Fledasty/Src/Strings/UTF8String.c

452 lines
20 KiB
C

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* -----------------------------------------------------------------------------
* File: UTF8String.c
* Description:
* This file contains the functions for modifying the UTF-8 String. It includes
* functions to append, Insert at index, insert before character,
* insert before string, insert after character, insert after string, replace,
* copy, pop, remove, remove range, clear, check if contains string, check if
* empty.
*
* Author: Mineplay
* -----------------------------------------------------------------------------
*/
#include "../../Include/Fledasty/Strings/UTF8String.h"
#include <Hallocy/Core/Allocator.h>
#include <Hallocy/Core/Memory.h>
#include <Hallocy/Utils/Error.h>
FledastyError fledasty_utf8_string_initialize(FledastyUtf8String *new_string, unsigned char *character_string, const size_t character_string_size) {
if (new_string == NULL) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (character_string == NULL || character_string_size == 0) {
new_string->size = 0;
new_string->capacity = 10;
new_string->character_string = (unsigned char*)hallocy_malloc(new_string->capacity);
} else {
if (!fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
new_string->size = character_string_size;
new_string->capacity = new_string->size + new_string->size;
new_string->character_string = (unsigned char*)hallocy_malloc(new_string->capacity);
hallocy_copy_memory(new_string->character_string, character_string, character_string_size);
}
new_string->character_string[new_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_destroy(FledastyUtf8String *current_string) {
if (current_string == NULL) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (hallocy_free(current_string->character_string) != HALLOCY_ERROR_NONE) {
return FLEDASTY_ERROR_FAILED_ALLOCATION;
}
current_string->character_string = NULL;
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_append(FledastyUtf8String *current_string, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (!fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
if (current_string->capacity <= current_string->size + character_string_size) {
current_string->capacity += (current_string->capacity > character_string_size) ? current_string->capacity : character_string_size + 1;
current_string->character_string = (unsigned char*)hallocy_realloc(current_string->character_string, current_string->capacity * sizeof(unsigned char));
}
hallocy_copy_memory(current_string->character_string + current_string->size, character_string, character_string_size);
current_string->size += character_string_size;
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_insert_at_index(FledastyUtf8String *current_string, const size_t index, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (index > current_string->size) {
return FLEDASTY_ERROR_INDEX_OUT_OF_RANGE;
}
if (!fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
if (current_string->capacity <= current_string->size + character_string_size) {
current_string->capacity += (current_string->capacity > character_string_size) ? current_string->capacity : character_string_size;
current_string->character_string = (unsigned char*)hallocy_realloc(current_string->character_string, current_string->capacity * sizeof(unsigned char));
}
hallocy_move_memory(current_string->character_string + (index + character_string_size), current_string->character_string + index, current_string->size - index);
hallocy_copy_memory(current_string->character_string + index, character_string, character_string_size);
current_string->size += character_string_size;
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_insert_before_string(FledastyUtf8String *current_string, unsigned char *before_character_string, const size_t before_character_string_size, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || before_character_string == NULL || before_character_string_size == 0 || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (!fledasty_utf8_string_validate(before_character_string, before_character_string_size) || !fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
size_t index = 0;
while (index < (current_string->size - before_character_string_size) && !hallocy_compare_memory(current_string->character_string + index, before_character_string, before_character_string_size)) {
if (hallocy_compare_memory(current_string->character_string + index, before_character_string, before_character_string_size)) {
if (current_string->capacity <= current_string->size + character_string_size) {
current_string->capacity += (current_string->capacity > character_string_size) ? current_string->capacity : character_string_size + 1;
current_string->character_string = (unsigned char*)hallocy_realloc(current_string->character_string, current_string->capacity * sizeof(unsigned char));
}
hallocy_move_memory(current_string->character_string + (index + character_string_size), current_string->character_string + index, current_string->size - index);
hallocy_copy_memory(current_string->character_string + index, character_string, character_string_size);
current_string->size += character_string_size;
return FLEDASTY_ERROR_NONE;
}
index += 1;
}
return FLEDASTY_ERROR_VALUE_NOT_FOUND;
}
FledastyError fledasty_utf8_string_insert_after_string(FledastyUtf8String *current_string, unsigned char *after_character_string, const size_t after_character_string_size, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || after_character_string == NULL || after_character_string_size == 0 || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (!fledasty_utf8_string_validate(after_character_string, after_character_string_size) || !fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
size_t index = 0;
while (index < (current_string->size - after_character_string_size)) {
if (hallocy_compare_memory(current_string->character_string + index, after_character_string, after_character_string_size)) {
if (current_string->capacity <= current_string->size + character_string_size) {
current_string->capacity += (current_string->capacity > character_string_size) ? current_string->capacity : character_string_size + 1;
current_string->character_string = (unsigned char*)hallocy_realloc(current_string->character_string, current_string->capacity * sizeof(unsigned char));
}
index += after_character_string_size;
hallocy_move_memory(current_string->character_string + (index + character_string_size), current_string->character_string + index, current_string->size - index + 1);
hallocy_copy_memory(current_string->character_string + index, character_string, character_string_size);
current_string->size += character_string_size;
return FLEDASTY_ERROR_NONE;
}
index += 1;
}
return FLEDASTY_ERROR_VALUE_NOT_FOUND;
}
FledastyError fledasty_utf8_string_pop(FledastyUtf8String *current_string) {
if (current_string == NULL) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if ((current_string->character_string[current_string->size - 5] & 0xF0) == 0xF0) {
current_string->size -= 4;
} else if ((current_string->character_string[current_string->size - 4] & 0xE0) == 0xC0) {
current_string->size -= 3;
} else if ((current_string->character_string[current_string->size - 3] & 0xC0) == 0xC0) {
current_string->size -= 2;
} else {
current_string->size -= 1;
}
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_remove(FledastyUtf8String *current_string, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (!fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
size_t index = 0;
while (index < (current_string->size - character_string_size)) {
if (hallocy_compare_memory(current_string->character_string + index, character_string, character_string_size)) {
hallocy_move_memory(current_string->character_string + index, current_string->character_string + index + character_string_size, current_string->size - (index + character_string_size));
current_string->size -= character_string_size;
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
index += 1;
}
return FLEDASTY_ERROR_VALUE_NOT_FOUND;
}
FledastyError fledasty_utf8_string_remove_range(FledastyUtf8String *current_string, const size_t start_index, const size_t end_index) {
if (current_string == NULL) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (start_index >= end_index || end_index >= current_string->size) {
return FLEDASTY_ERROR_INDEX_OUT_OF_RANGE;
}
hallocy_move_memory(current_string->character_string + start_index, current_string->character_string + end_index, current_string->size - end_index);
current_string->size -= end_index - start_index;
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_clear(FledastyUtf8String *current_string) {
if (current_string == NULL) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
current_string->size = 0;
current_string->character_string[0] = '\0';
return FLEDASTY_ERROR_NONE;
}
FledastyError fledasty_utf8_string_replace_string(FledastyUtf8String *current_string, unsigned char *replace_character_string, const size_t replace_character_string_size, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || replace_character_string == NULL || replace_character_string_size == 0 || character_string == NULL || character_string_size == 0) {
return FLEDASTY_ERROR_INVALID_POINTER;
}
if (!fledasty_utf8_string_validate(replace_character_string, replace_character_string_size) || !fledasty_utf8_string_validate(character_string, character_string_size)) {
return FLEDASTY_ERROR_INVALID_VALUE;
}
size_t index = 0;
while (index < current_string->size - replace_character_string_size) {
if (hallocy_compare_memory(current_string->character_string + index, replace_character_string, replace_character_string_size)) {
const size_t new_size = current_string->size + (character_string_size - replace_character_string_size);
if (current_string->capacity <= new_size) {
current_string->capacity += (current_string->capacity > character_string_size) ? current_string->capacity : character_string_size;
current_string->character_string = (unsigned char*)hallocy_realloc(current_string->character_string, current_string->capacity * sizeof(unsigned char));
}
hallocy_move_memory(current_string->character_string + index + character_string_size, current_string->character_string + index + replace_character_string_size, current_string->size - (index + replace_character_string_size));
hallocy_copy_memory(current_string->character_string + index, character_string, character_string_size);
current_string->size = new_size;
current_string->character_string[current_string->size] = '\0';
return FLEDASTY_ERROR_NONE;
}
index += 1;
}
return FLEDASTY_ERROR_VALUE_NOT_FOUND;
}
bool fledasty_utf8_string_has_string(const FledastyUtf8String *current_string, unsigned char *character_string, const size_t character_string_size) {
if (current_string == NULL || character_string == NULL || character_string_size == 0) {
return false;
}
if (!fledasty_utf8_string_validate(character_string, character_string_size)) {
return false;
}
size_t index = 0;
while (index < current_string->size - character_string_size) {
if (hallocy_compare_memory(current_string->character_string + index, character_string, character_string_size)) {
return true;
}
index += 1;
}
return false;
}
FledastyUtf8String fledasty_utf8_string_encode(const uint32_t *unicode, const size_t size) {
FledastyUtf8String utf8_string;
fledasty_utf8_string_initialize(&utf8_string, NULL, 0);
if (unicode == NULL) {
return utf8_string;
}
size_t string_index = 0;
for (size_t index = 0; index < size; index += 1) {
if (unicode[index] <= 0x00007F) {
if (utf8_string.capacity <= string_index) {
utf8_string.capacity += utf8_string.capacity;
utf8_string.character_string = (unsigned char*)hallocy_realloc(utf8_string.character_string, utf8_string.capacity);
}
utf8_string.character_string[string_index] = unicode[index];
string_index += 1;
} else if (unicode[index] <= 0x0007FF) {
if (utf8_string.capacity <= string_index + 2) {
utf8_string.capacity += utf8_string.capacity;
utf8_string.character_string = (unsigned char*)hallocy_realloc(utf8_string.character_string, utf8_string.capacity);
}
utf8_string.character_string[string_index] = 0xC0 | ((unicode[index] >> 6) & 0x07);
utf8_string.character_string[string_index + 1] = 0x80 | (unicode[index] & 0x3F);
string_index += 2;
} else if (unicode[index] <= 0x00FFFF) {
if (utf8_string.capacity <= string_index + 3) {
utf8_string.capacity += utf8_string.capacity;
utf8_string.character_string = (unsigned char*)hallocy_realloc(utf8_string.character_string, utf8_string.capacity);
}
utf8_string.character_string[string_index] = 0xE0 | ((unicode[index] >> 12) & 0x07);
utf8_string.character_string[string_index + 1] = 0x80 | ((unicode[index] >> 6) & 0x3F);
utf8_string.character_string[string_index + 2] = 0x80 | (unicode[index] & 0x3F);
string_index += 3;
} else if (unicode[index] <= 0x10FFFF) {
if (utf8_string.capacity <= string_index + 4) {
utf8_string.capacity += utf8_string.capacity;
utf8_string.character_string = (unsigned char*)hallocy_realloc(utf8_string.character_string, utf8_string.capacity);
}
utf8_string.character_string[string_index] = 0xF0 | ((unicode[index] >> 18) & 0x07);
utf8_string.character_string[string_index + 1] = 0x80 | ((unicode[index] >> 12) & 0x3F);
utf8_string.character_string[string_index + 2] = 0x80 | ((unicode[index] >> 6) & 0x3F);
utf8_string.character_string[string_index + 3] = 0x80 | (unicode[index] & 0x3F);
string_index += 4;
}
}
utf8_string.size = string_index;
if (utf8_string.capacity <= utf8_string.size) {
utf8_string.capacity += utf8_string.capacity;
utf8_string.character_string = (unsigned char*)hallocy_realloc(utf8_string.character_string, utf8_string.capacity);
}
utf8_string.character_string[utf8_string.size] = '\0';
return utf8_string;
}
uint32_t *fledasty_utf8_string_decode(const FledastyUtf8String *current_string, size_t *unicode_string_size) {
if (current_string == NULL || unicode_string_size == NULL) {
return NULL;
}
(*unicode_string_size) = 0;
size_t index = 0;
uint32_t *unicode_string = (uint32_t*)hallocy_malloc(current_string->size * sizeof(uint32_t));
while (index < current_string->size) {
if ((current_string->character_string[index] & 0xF0) == 0xF0) {
unicode_string[*unicode_string_size] = ((current_string->character_string[index] & 0x07) << 18) | ((current_string->character_string[index + 1] & 0x3F) << 12) | ((current_string->character_string[index + 2] & 0x3F) << 6) | (current_string->character_string[index + 3] & 0x3F);
index += 4;
} else if ((current_string->character_string[index] & 0xE0) == 0xE0) {
unicode_string[*unicode_string_size] = ((current_string->character_string[index] & 0x0F) << 12) | ((current_string->character_string[index + 1] & 0x3F) << 6) | (current_string->character_string[index + 2] & 0x3F);
index += 3;
} else if ((current_string->character_string[index] & 0xC0) == 0xC0) {
unicode_string[*unicode_string_size] = ((current_string->character_string[index] & 0x1F) << 6) | (current_string->character_string[index + 1] & 0x3F);
index += 2;
} else {
unicode_string[*unicode_string_size] = current_string->character_string[index];
index += 1;
}
(*unicode_string_size) += 1;
}
return unicode_string;
}
bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_size) {
if (character_string == NULL) {
return false;
}
size_t index = 0;
while (index < character_string_size) {
if ((character_string[index] & 0xF0) == 0xF0) {
if (index + 3 >= character_string_size) {
return false;
}
if ((character_string[index + 1] & 0xC0) != 0x80) {
return false;
} else if ((character_string[index + 2] & 0xC0) != 0x80) {
return false;
} else if ((character_string[index + 3] & 0xC0) != 0x80) {
return false;
}
index += 4;
} else if ((character_string[index] & 0xE0) == 0xC0) {
if (index + 2 >= character_string_size) {
return false;
}
if ((character_string[index + 1] & 0xC0) != 0x80) {
return false;
} else if ((character_string[index + 2] & 0xC0) != 0x80) {
return false;
}
index += 3;
} else if ((character_string[index] & 0xC0) == 0xC0) {
if (index + 1 >= character_string_size) {
return false;
}
if ((character_string[index + 1] & 0xC0) != 0x80) {
return false;
}
index += 2;
} else {
index += 1;
}
}
return true;
}
size_t fledasty_utf8_string_get_size(const unsigned char *character_string) {
size_t size = 0;
while (character_string[size] != '\0') {
size += 1;
}
return size;
}