diff --git a/Include/Fledasty/Strings/UTF8String.h b/Include/Fledasty/Strings/UTF8String.h index d46ee75..797385e 100644 --- a/Include/Fledasty/Strings/UTF8String.h +++ b/Include/Fledasty/Strings/UTF8String.h @@ -23,7 +23,6 @@ * Author: Mineplay * ----------------------------------------------------------------------------- */ - #include #include #include @@ -38,7 +37,7 @@ typedef struct { FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, unsigned char *character_string, size_t character_string_length); FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string); -FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size); +FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, const size_t size); uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length); -FledastyError fledasty_utf8_string_append(FledastyUtf8String *current_string, unsigned char *character_string); \ No newline at end of file +bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_length); diff --git a/Include/Fledasty/Utils/Error.h b/Include/Fledasty/Utils/Error.h index e52af8f..e9841fd 100644 --- a/Include/Fledasty/Utils/Error.h +++ b/Include/Fledasty/Utils/Error.h @@ -30,6 +30,7 @@ typedef enum { FLEDASTY_ERROR_INDEX_OUT_OF_RANGE = 3, FLEDASTY_ERROR_VALUE_NOT_FOUND = 4, FLEDASTY_ERROR_KEY_NOT_FOUND = 5, + FLEDASTY_ERROR_INVALID_VALUE = 6, } FledastyError; #endif \ No newline at end of file diff --git a/Src/Strings/UTF8String.c b/Src/Strings/UTF8String.c index 147c26c..2a7bec6 100644 --- a/Src/Strings/UTF8String.c +++ b/Src/Strings/UTF8String.c @@ -35,12 +35,16 @@ FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, un return FLEDASTY_ERROR_INVALID_POINTER; } - if (character_string == NULL) { + if (character_string == NULL || character_string_length == 0) { new_string->size = 0; new_string->capacity = 10; new_string->character_string = hallocy_malloc(new_string->capacity); } else { + if (!fledasty_utf8_string_validate(character_string, character_string_length)) { + return FLEDASTY_ERROR_INVALID_VALUE; + } + new_string->size = character_string_length; new_string->capacity = new_string->size + new_string->size; @@ -68,7 +72,7 @@ FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string) { return FLEDASTY_ERROR_NONE; } -FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size) { +FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, const size_t size) { FledastyUtf8String utf8_string; fledasty_utf8_string_initialize(&utf8_string, NULL, 0); @@ -130,7 +134,7 @@ FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size) { } uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length) { - if (current_string == NULL) { + if (current_string == NULL || unicode_string_length == NULL) { return NULL; } @@ -157,3 +161,54 @@ uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t return unicode_string; } + +bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_length) { + if (character_string == NULL) { + return false; + } + + size_t index = 0; + while (index < character_string_length) { + if ((character_string[index] & 0xF0) == 0xF0) { + if (index + 3 >= character_string_length) { + return false; + } + + if ((character_string[index + 1] & 0xC0) != 0x80) { + return false; + } else if ((character_string[index + 2] & 0xC0) != 0x80) { + return false; + } else if ((character_string[index + 3] & 0xC0) != 0x80) { + return false; + } + + index += 4; + } else if ((character_string[index] & 0xE0) == 0xC0) { + if (index + 2 >= character_string_length) { + return false; + } + + if ((character_string[index + 1] & 0xC0) != 0x80) { + return false; + } else if ((character_string[index + 2] & 0xC0) != 0x80) { + return false; + } + + index += 3; + } else if ((character_string[index] & 0xC0) == 0xC0) { + if (index + 1 >= character_string_length) { + return false; + } + + if ((character_string[index + 1] & 0xC0) != 0x80) { + return false; + } + + index += 2; + } else { + index += 1; + } + } + + return true; +} diff --git a/Tests/Main.c b/Tests/Main.c index 01895d8..0c7a3e6 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -235,6 +235,22 @@ int main() { printf("%s\n", encoded_string.character_string); + if (fledasty_utf8_string_validate(test_utf8_string.character_string, encoded_string.size)) { + printf("UTF-8 test string is valid!\n"); + } + + if (fledasty_utf8_string_validate(encoded_string.character_string, encoded_string.size)) { + printf("UTF-8 encoded string is valid!\n"); + } + + unsigned char *invalid_utf8 = (unsigned char*)hallocy_malloc(2 * sizeof(unsigned char)); + invalid_utf8[0] = 0xDF; + invalid_utf8[1] = 0xFF; + if (!fledasty_utf8_string_validate(invalid_utf8, 2)) { + printf("UTF-8 invalid string is invalid!\n"); + } + + hallocy_free(invalid_utf8); hallocy_free(unicode); fledasty_utf8_string_destroy(&encoded_string);