diff --git a/Include/Fledasty/Strings/UTF8String.h b/Include/Fledasty/Strings/UTF8String.h index 1c7f962..d46ee75 100644 --- a/Include/Fledasty/Strings/UTF8String.h +++ b/Include/Fledasty/Strings/UTF8String.h @@ -25,6 +25,7 @@ */ #include +#include #include #include "../Utils/Error.h" @@ -37,4 +38,7 @@ typedef struct { FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, unsigned char *character_string, size_t character_string_length); FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string); +FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size); +uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length); + FledastyError fledasty_utf8_string_append(FledastyUtf8String *current_string, unsigned char *character_string); \ No newline at end of file diff --git a/Src/Strings/UTF8String.c b/Src/Strings/UTF8String.c index ecdb15e..147c26c 100644 --- a/Src/Strings/UTF8String.c +++ b/Src/Strings/UTF8String.c @@ -28,13 +28,14 @@ #include #include #include +#include FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, unsigned char *character_string, size_t character_string_length) { if (new_string == NULL) { return FLEDASTY_ERROR_INVALID_POINTER; } - if (character_string == NULL || character_string_length == 0) { + if (character_string == NULL) { new_string->size = 0; new_string->capacity = 10; @@ -66,3 +67,93 @@ FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string) { current_string->character_string = NULL; return FLEDASTY_ERROR_NONE; } + +FledastyUtf8String fledasty_utf8_string_encode(uint32_t *unicode, size_t size) { + FledastyUtf8String utf8_string; + fledasty_utf8_string_initialize(&utf8_string, NULL, 0); + + if (unicode == NULL) { + return utf8_string; + } + + size_t string_index = 0; + for (size_t index = 0; index < size; index += 1) { + if (unicode[index] <= 0x00007F) { + if (utf8_string.capacity <= string_index) { + utf8_string.capacity += utf8_string.capacity; + utf8_string.character_string = hallocy_realloc(utf8_string.character_string, utf8_string.capacity); + } + + utf8_string.character_string[string_index] = unicode[index]; + string_index += 1; + } else if (unicode[index] <= 0x0007FF) { + if (utf8_string.capacity <= string_index + 2) { + utf8_string.capacity += utf8_string.capacity; + utf8_string.character_string = hallocy_realloc(utf8_string.character_string, utf8_string.capacity); + } + + utf8_string.character_string[string_index] = 0xC0 | ((unicode[index] >> 6) & 0x07); + utf8_string.character_string[string_index + 1] = 0x80 | (unicode[index] & 0x3F); + string_index += 2; + } else if (unicode[index] <= 0x00FFFF) { + if (utf8_string.capacity <= string_index + 3) { + utf8_string.capacity += utf8_string.capacity; + utf8_string.character_string = hallocy_realloc(utf8_string.character_string, utf8_string.capacity); + } + + utf8_string.character_string[string_index] = 0xE0 | ((unicode[index] >> 12) & 0x07); + utf8_string.character_string[string_index + 1] = 0x80 | ((unicode[index] >> 6) & 0x3F); + utf8_string.character_string[string_index + 2] = 0x80 | (unicode[index] & 0x3F); + string_index += 3; + } else if (unicode[index] <= 0x10FFFF) { + if (utf8_string.capacity <= string_index + 4) { + utf8_string.capacity += utf8_string.capacity; + utf8_string.character_string = hallocy_realloc(utf8_string.character_string, utf8_string.capacity); + } + + utf8_string.character_string[string_index] = 0xF0 | ((unicode[index] >> 18) & 0x07); + utf8_string.character_string[string_index + 1] = 0x80 | ((unicode[index] >> 12) & 0x3F); + utf8_string.character_string[string_index + 2] = 0x80 | ((unicode[index] >> 6) & 0x3F); + utf8_string.character_string[string_index + 3] = 0x80 | (unicode[index] & 0x3F); + string_index += 4; + } + } + + utf8_string.size = string_index; + if (utf8_string.capacity <= utf8_string.size + 1) { + utf8_string.capacity += utf8_string.capacity; + utf8_string.character_string = hallocy_realloc(utf8_string.character_string, utf8_string.capacity); + } + + utf8_string.character_string[utf8_string.size] = '\0'; + return utf8_string; +} + +uint32_t *fledasty_utf8_string_decode(FledastyUtf8String *current_string, size_t *unicode_string_length) { + if (current_string == NULL) { + return NULL; + } + + (*unicode_string_length) = 0; + size_t index = 0; + uint32_t *unicode_string = hallocy_malloc(current_string->size * sizeof(uint32_t)); + while (index < current_string->size) { + if ((current_string->character_string[index] & 0xF0) == 0xF0) { + unicode_string[*unicode_string_length] = ((current_string->character_string[index] & 0x07) << 18) | ((current_string->character_string[index + 1] & 0x3F) << 12) | ((current_string->character_string[index + 2] & 0x3F) << 6) | (current_string->character_string[index + 3] & 0x3F); + index += 4; + } else if ((current_string->character_string[index] & 0xE0) == 0xE0) { + unicode_string[*unicode_string_length] = ((current_string->character_string[index] & 0x0F) << 12) | ((current_string->character_string[index + 1] & 0x3F) << 6) | (current_string->character_string[index + 2] & 0x3F); + index += 3; + } else if ((current_string->character_string[index] & 0xC0) == 0xC0) { + unicode_string[*unicode_string_length] = ((current_string->character_string[index] & 0x1F) << 6) | (current_string->character_string[index + 1] & 0x3F); + index += 2; + } else { + unicode_string[*unicode_string_length] = current_string->character_string[index]; + index += 1; + } + + (*unicode_string_length) += 1; + } + + return unicode_string; +} diff --git a/Tests/Main.c b/Tests/Main.c index 0c34757..01895d8 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -20,6 +20,7 @@ * ----------------------------------------------------------------------------- */ #include +#include #include #include #include @@ -224,10 +225,19 @@ int main() { fledasty_hash_table_destroy(&test_hash_table); FledastyUtf8String test_utf8_string; unsigned char *test_string = (unsigned char*)"đŸ˜€â‚¬Testing"; - fledasty_utf8_string_initialize(&test_utf8_string, test_string, 11); + fledasty_utf8_string_initialize(&test_utf8_string, test_string, 15); printf("%s\n", test_string); printf("%s\n", test_utf8_string.character_string); + size_t unicode_length = 0; + uint32_t *unicode = fledasty_utf8_string_decode(&test_utf8_string, &unicode_length); + FledastyUtf8String encoded_string = fledasty_utf8_string_encode(unicode, unicode_length); + + printf("%s\n", encoded_string.character_string); + + hallocy_free(unicode); + fledasty_utf8_string_destroy(&encoded_string); + fledasty_utf8_string_destroy(&test_utf8_string); printf("Done\n"); return 0;