feat(utf-8 string): implemented validation of utf-8 string
This commit is contained in:
parent
e9d8cdd8a3
commit
13a95d9027
4 changed files with 77 additions and 6 deletions
|
|
@ -23,7 +23,6 @@
|
|||
* Author: Mineplay
|
||||
* -----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
|
@ -38,7 +37,7 @@ typedef struct {
|
|||
FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, unsigned char *character_string, size_t character_string_length);
|
||||
FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string);
|
||||
|
||||
FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size);
|
||||
FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, const size_t size);
|
||||
uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length);
|
||||
|
||||
FledastyError fledasty_utf8_string_append(FledastyUtf8String *current_string, unsigned char *character_string);
|
||||
bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_length);
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ typedef enum {
|
|||
FLEDASTY_ERROR_INDEX_OUT_OF_RANGE = 3,
|
||||
FLEDASTY_ERROR_VALUE_NOT_FOUND = 4,
|
||||
FLEDASTY_ERROR_KEY_NOT_FOUND = 5,
|
||||
FLEDASTY_ERROR_INVALID_VALUE = 6,
|
||||
} FledastyError;
|
||||
|
||||
#endif
|
||||
|
|
@ -35,12 +35,16 @@ FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, un
|
|||
return FLEDASTY_ERROR_INVALID_POINTER;
|
||||
}
|
||||
|
||||
if (character_string == NULL) {
|
||||
if (character_string == NULL || character_string_length == 0) {
|
||||
new_string->size = 0;
|
||||
new_string->capacity = 10;
|
||||
|
||||
new_string->character_string = hallocy_malloc(new_string->capacity);
|
||||
} else {
|
||||
if (!fledasty_utf8_string_validate(character_string, character_string_length)) {
|
||||
return FLEDASTY_ERROR_INVALID_VALUE;
|
||||
}
|
||||
|
||||
new_string->size = character_string_length;
|
||||
new_string->capacity = new_string->size + new_string->size;
|
||||
|
||||
|
|
@ -68,7 +72,7 @@ FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string) {
|
|||
return FLEDASTY_ERROR_NONE;
|
||||
}
|
||||
|
||||
FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size) {
|
||||
FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, const size_t size) {
|
||||
FledastyUtf8String utf8_string;
|
||||
fledasty_utf8_string_initialize(&utf8_string, NULL, 0);
|
||||
|
||||
|
|
@ -130,7 +134,7 @@ FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size) {
|
|||
}
|
||||
|
||||
uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length) {
|
||||
if (current_string == NULL) {
|
||||
if (current_string == NULL || unicode_string_length == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -157,3 +161,54 @@ uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t
|
|||
|
||||
return unicode_string;
|
||||
}
|
||||
|
||||
bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_length) {
|
||||
if (character_string == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
while (index < character_string_length) {
|
||||
if ((character_string[index] & 0xF0) == 0xF0) {
|
||||
if (index + 3 >= character_string_length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((character_string[index + 1] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
} else if ((character_string[index + 2] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
} else if ((character_string[index + 3] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
}
|
||||
|
||||
index += 4;
|
||||
} else if ((character_string[index] & 0xE0) == 0xC0) {
|
||||
if (index + 2 >= character_string_length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((character_string[index + 1] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
} else if ((character_string[index + 2] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
}
|
||||
|
||||
index += 3;
|
||||
} else if ((character_string[index] & 0xC0) == 0xC0) {
|
||||
if (index + 1 >= character_string_length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((character_string[index + 1] & 0xC0) != 0x80) {
|
||||
return false;
|
||||
}
|
||||
|
||||
index += 2;
|
||||
} else {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
16
Tests/Main.c
16
Tests/Main.c
|
|
@ -235,6 +235,22 @@ int main() {
|
|||
|
||||
printf("%s\n", encoded_string.character_string);
|
||||
|
||||
if (fledasty_utf8_string_validate(test_utf8_string.character_string, encoded_string.size)) {
|
||||
printf("UTF-8 test string is valid!\n");
|
||||
}
|
||||
|
||||
if (fledasty_utf8_string_validate(encoded_string.character_string, encoded_string.size)) {
|
||||
printf("UTF-8 encoded string is valid!\n");
|
||||
}
|
||||
|
||||
unsigned char *invalid_utf8 = (unsigned char*)hallocy_malloc(2 * sizeof(unsigned char));
|
||||
invalid_utf8[0] = 0xDF;
|
||||
invalid_utf8[1] = 0xFF;
|
||||
if (!fledasty_utf8_string_validate(invalid_utf8, 2)) {
|
||||
printf("UTF-8 invalid string is invalid!\n");
|
||||
}
|
||||
|
||||
hallocy_free(invalid_utf8);
|
||||
hallocy_free(unicode);
|
||||
fledasty_utf8_string_destroy(&encoded_string);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue