diff --git a/Include/Fledasty/Algorithms/Hashing.h b/Include/Fledasty/Algorithms/Hashing.h new file mode 100644 index 0000000..6ed5606 --- /dev/null +++ b/Include/Fledasty/Algorithms/Hashing.h @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ----------------------------------------------------------------------------- + * File: Hashing.h + * Description: + * This file contains the functions for hashing common datastructures and types + * for hash tables. + * + * Author: Mineplay + * ----------------------------------------------------------------------------- + */ +#ifndef FLEDASTY_HASHING +#define FLEDASTY_HASHING + +#include +#include + +typedef struct { + uint64_t low, high; +} FledastyHash128; + +uint32_t fledasty_mur_mur_3_hash_x32(const void *bytes, const size_t size, const uint32_t seed); +FledastyHash128 fledasty_mur_mur_3_hash_x64_128(const void *bytes, const size_t size, const uint32_t seed); + +#endif diff --git a/Include/Fledasty/Strings/UTF8String.h b/Include/Fledasty/Strings/UTF8String.h index 1d07d55..cf7915c 100644 --- a/Include/Fledasty/Strings/UTF8String.h +++ b/Include/Fledasty/Strings/UTF8String.h @@ -23,6 +23,9 @@ * Author: Mineplay * ----------------------------------------------------------------------------- */ +#ifndef FLEDASTY_UTF8_STRING +#define FLEDASTY_UTF8_STRING + #include #include #include @@ -59,3 +62,5 @@ uint32_t *fledasty_utf8_string_decode(const FledastyUtf8String *current_string, bool fledasty_utf8_string_validate(unsigned char *character_string, const size_t character_string_size); size_t fledasty_utf8_string_get_size(const unsigned char *character_string); + +#endif diff --git a/Src/Algorithms copy/Hashing.h b/Src/Algorithms copy/Hashing.h new file mode 100644 index 0000000..e69de29 diff --git a/Src/Algorithms/Hashing.c b/Src/Algorithms/Hashing.c new file mode 100644 index 0000000..4c9706a --- /dev/null +++ b/Src/Algorithms/Hashing.c @@ -0,0 +1,188 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ----------------------------------------------------------------------------- + * File: Hashing.c + * Description: + * This file contains the functions for hashing common datastructures and types + * for hash tables. + * + * Author: Mineplay + * ----------------------------------------------------------------------------- + */ +#include "../../Include/Fledasty/Algorithms/Hashing.h" + +#include +#include + +static inline uint32_t fledasty_rolt_32(const uint32_t key, const uint32_t rotation) { return (key << rotation) | (key >> (32 - rotation)); } +static inline uint64_t fledasty_rolt_64(uint64_t key, const uint64_t rotation) { return (key << rotation) | (key >> (64 - rotation)); } + +uint32_t fledasty_mur_mur_3_hash_x32(const void *bytes, const size_t size, const uint32_t seed) { + unsigned char *hashing_bytes = (unsigned char*)bytes; + + uint32_t hash = seed; + uint32_t key = 0; + for (size_t index = size >> 2; index; index -= 1) { + hallocy_copy_memory(&key, hashing_bytes, sizeof(uint32_t)); + hashing_bytes += sizeof(uint32_t); + + key *= 0xcc9e2d51; + key = fledasty_rolt_32(key, 15); + key *= 0x1b873593; + + hash ^= key; + hash = fledasty_rolt_32(hash, 13); + hash = (hash * 5) + 0xe6546b64; + } + + for (size_t index = size & 3; index; index -= 1) { + key = 0; + for (size_t i = (size & 3); i > 0; i -= 1) { + key ^= hashing_bytes[i] << (8 * (i & 3)); + } + + key *= 0xcc9e2d51; + key = fledasty_rolt_32(key, 13); + key *= 0x1b873593; + + hash ^= key; + } + + hash ^= size; + + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + + return hash; +} + +FledastyHash128 fledasty_mur_mur_3_hash_x64_128(const void *bytes, const size_t size, const uint32_t seed) { + unsigned char *hashing_bytes = (unsigned char *)bytes; + + FledastyHash128 hash = { .low = seed, .high = seed }; + + size_t low_key = 0; + size_t high_key = 0; + for (size_t index = size >> 4; index; index -= 1) { + hallocy_copy_memory(&low_key, hashing_bytes, sizeof(uint64_t)); + hashing_bytes += sizeof(uint64_t); + + low_key *= 0x87c37b91114253d5ULL; + low_key = fledasty_rolt_64(low_key, 31); + low_key *= 0x4cf5ad432745937fULL; + + hash.low ^= low_key; + hash.low = fledasty_rolt_64(hash.low, 27); + hash.low += hash.high; + hash.low = hash.low * 5 + 0x52dce729; + + hallocy_copy_memory(&high_key, hashing_bytes, sizeof(uint64_t)); + hashing_bytes += sizeof(uint64_t); + + high_key *= 0x4cf5ad432745937fULL; + high_key = fledasty_rolt_64(high_key, 33); + high_key *= 0x87c37b91114253d5ULL; + + hash.high ^= high_key; + hash.high = fledasty_rolt_64(hash.high, 31); + hash.high += hash.low; + hash.high = hash.high * 5 + 0x38495ab5; + } + + low_key = 0; + high_key = 0; + + switch(size & 15) { + case 15: + high_key ^= ((uint64_t)hashing_bytes[14]) << 48; + case 14: + high_key ^= ((uint64_t)hashing_bytes[13]) << 40; + case 13: + high_key ^= ((uint64_t)hashing_bytes[12]) << 32; + case 12: + high_key ^= ((uint64_t)hashing_bytes[11]) << 24; + case 11: + high_key ^= ((uint64_t)hashing_bytes[10]) << 16; + case 10: + high_key ^= ((uint64_t)hashing_bytes[9]) << 8; + case 9: + high_key ^= (uint64_t)hashing_bytes[8]; + case 8: + low_key ^= ((uint64_t)hashing_bytes[7]) << 56; + case 7: + low_key ^= ((uint64_t)hashing_bytes[6]) << 48; + case 6: + low_key ^= ((uint64_t)hashing_bytes[5]) << 40; + case 5: + low_key ^= ((uint64_t)hashing_bytes[4]) << 32; + case 4: + low_key ^= ((uint64_t)hashing_bytes[3]) << 24; + case 3: + low_key ^= ((uint64_t)hashing_bytes[2]) << 16; + case 2: + low_key ^= ((uint64_t)hashing_bytes[1]) << 8; + case 1: + low_key ^= ((uint64_t)hashing_bytes[0]); + } + + if (low_key != 0) { + low_key *= 0x87c37b91114253d5ULL; + low_key = fledasty_rolt_64(low_key, 31); + low_key *= 0x4cf5ad432745937fULL; + + hash.low ^= low_key; + } + + hash.low = fledasty_rolt_64(hash.low, 27); + hash.low += hash.high; + hash.low = hash.low * 5 + 0x52dce729; + + if (high_key != 0) { + high_key *= 0x4cf5ad432745937fULL; + high_key = fledasty_rolt_64(high_key, 33); + high_key *= 0x87c37b91114253d5ULL; + + hash.high ^= high_key; + } + + hash.high = fledasty_rolt_64(hash.high, 31); + hash.high += hash.low; + hash.high = hash.high * 5 + 0x38495ab5; + + hash.low ^= size; + hash.high ^= size; + + hash.low += hash.high; + hash.high += hash.low; + + hash.low ^= hash.low >> 33; + hash.low *= 0xff51afd7ed558ccdULL; + hash.low ^= hash.low >> 33; + hash.low *= 0xc4ceb9fe1a85ec53ULL; + hash.low ^= hash.low >> 33; + + hash.high ^= hash.high >> 33; + hash.high *= 0xff51afd7ed558ccdULL; + hash.high ^= hash.high >> 33; + hash.high *= 0xc4ceb9fe1a85ec53ULL; + hash.high ^= hash.high >> 33; + + hash.low += hash.high; + hash.high += hash.low; + + return hash; +} diff --git a/Tests/Main.c b/Tests/Main.c index b928c5a..43c0a75 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -28,6 +28,7 @@ #include #include #include +#include static inline size_t integer_hash_function(const void *key) { return *(int*)key; } @@ -263,6 +264,12 @@ int main() { printf("String contains 😀!\n"); } + uint32_t hash_x32 = fledasty_mur_mur_3_hash_x32(test_utf8_string.character_string, test_utf8_string.size, 0); + printf("UTF-8 String hash using murmur 32 is: %u\n", hash_x32); + + FledastyHash128 hash_x64 = fledasty_mur_mur_3_hash_x64_128(test_utf8_string.character_string, test_utf8_string.size, 0); + printf("UTF-8 String hash using murmur 64 is: low = %lu, high = %lu\n", hash_x64.low, hash_x64.high); + size_t unicode_length = 0; uint32_t *unicode = fledasty_utf8_string_decode(&test_utf8_string, &unicode_length); FledastyUtf8String encoded_string = fledasty_utf8_string_encode(unicode, unicode_length);