From 64a3f602288d3e4f1db2838d1ffd80f849a7e83e Mon Sep 17 00:00:00 2001 From: Mineplay Date: Wed, 16 Apr 2025 17:18:07 -0500 Subject: [PATCH 1/4] feat(memory management): implemented memory set function --- Include/Hallocy/Core/Allocator.h | 1 - Include/Hallocy/Core/Memory.h | 33 ++++++++ Src/Core/Allocator.c | 2 + Src/Core/Memory.c | 133 +++++++++++++++++++++++++++++++ Tests/Main.c | 17 ++-- 5 files changed, 180 insertions(+), 6 deletions(-) create mode 100644 Include/Hallocy/Core/Memory.h create mode 100644 Src/Core/Memory.c diff --git a/Include/Hallocy/Core/Allocator.h b/Include/Hallocy/Core/Allocator.h index ed2fc87..825bad7 100644 --- a/Include/Hallocy/Core/Allocator.h +++ b/Include/Hallocy/Core/Allocator.h @@ -24,7 +24,6 @@ #define HALLOCY_ALLOCATOR #include -#include #include "../Utils/Error.h" diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h new file mode 100644 index 0000000..910dc15 --- /dev/null +++ b/Include/Hallocy/Core/Memory.h @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ----------------------------------------------------------------------------- + * File: Memory.h + * Description: + * This file implements the functions for managing memory. It includes functions + * to copy, move, compare and set memory. + * + * Author: Mineplay + * ----------------------------------------------------------------------------- + */ +#ifndef HALLOCY_MEMORY +#define HALLOCY_MEMORY + +#include +#include + +#include "../Utils/Error.h" + +HallocyError hallocy_set_memory(void *destination, int value, const size_t count); + +#endif \ No newline at end of file diff --git a/Src/Core/Allocator.c b/Src/Core/Allocator.c index 89bd271..9ff04ba 100644 --- a/Src/Core/Allocator.c +++ b/Src/Core/Allocator.c @@ -22,6 +22,8 @@ */ #include "../../Include/Hallocy/Core/Allocator.h" +#include + #if defined(_WIN32) #include diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c new file mode 100644 index 0000000..2935d07 --- /dev/null +++ b/Src/Core/Memory.c @@ -0,0 +1,133 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ----------------------------------------------------------------------------- + * File: Memory.c + * Description: + * This file implements the functions for managing memory. It includes functions + * to copy, move, compare and set memory. + * + * Author: Mineplay + * ----------------------------------------------------------------------------- + */ +#include "../../Include/Hallocy/Core/Memory.h" +#include "../../Include/Hallocy/Utils/Simd.h" + +#include +#include + +HallocyError hallocy_set_memory(void *destination, int value, const size_t count) { + if (destination == NULL) { + return HALLOCY_ERROR_INVALID_POINTER; + } + + unsigned char *destination_bytes = (unsigned char*)destination; + unsigned char *end_address = destination_bytes + count; + + unsigned char value_bytes = (unsigned char)value; + + switch (hallocy_is_simd_supported()) { + #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + case HALLOCY_SIMD_NEON: { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + uint8x16_t simd_value = vdupq_n_u8(value_bytes); + while (destination_bytes - end_address >= 16) { + vst1q_u8(destination_bytes, simd_value); + destination_bytes += 16; + } + break; + } + #else + case HALLOCY_SIMD_AVX512: { + while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + __m512i simd_value = _mm512_set1_epi8(value_bytes); + while (destination_bytes - end_address >= 64) { + _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); + destination_bytes += 64; + } + + break; + } + + case HALLOCY_SIMD_AVX2: { + while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + __m256i simd_value = _mm256_set1_epi8(value_bytes); + while (destination_bytes - end_address >= 32) { + _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); + destination_bytes += 32; + } + + break; + } + + case HALLOCY_SIMD_AVX: + case HALLOCY_SIMD_SSE2: + case HALLOCY_SIMD_SSE: { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + __m128i simd_value = _mm_set1_epi8(value_bytes); + while (destination_bytes - end_address >= 16) { + _mm_storeu_si128((__m128i*)destination_bytes, simd_value); + destination_bytes += 16; + } + + break; + } + + + #endif + default: { + size_t word_size = sizeof(size_t); + while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + size_t value_word = 0; + for (size_t i = 0; i < word_size; i++) { + value_word |= (size_t)value_bytes << (i * 8); + } + + size_t *destination_word = (size_t*)destination_bytes; + while ((unsigned char*)(destination_word + 1) < end_address) { + *destination_word = value_word; + destination_word += 1; + } + + destination_bytes = (unsigned char*)destination_word; + break; + } + } + + while (destination_bytes != end_address) { + *destination_bytes = value_bytes; + destination_bytes += 1; + } + + return HALLOCY_ERROR_NONE; +} \ No newline at end of file diff --git a/Tests/Main.c b/Tests/Main.c index f345485..8d43a56 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -19,12 +19,13 @@ * Author: Mineplay * ----------------------------------------------------------------------------- */ - #include - #include - #include +#include +#include +#include +#include int main() { - char *memory = (char *)hallocy_malloc(12288); + char *memory = (char*)hallocy_malloc(12288); if (memory == NULL) { printf("Failed it allocate memory!"); return -1; @@ -41,7 +42,13 @@ return -1; } - printf("Supported simd version: %d\n", hallocy_is_simd_supported()); + char *text = (char*)hallocy_malloc(36); + hallocy_set_memory(text, 'T', 35); + + text[100] = '\0'; + printf("%s\n", text); + + hallocy_free(text); return 0; } \ No newline at end of file From 3ae91ad59c51c39ceb6d2aae78dc20ff5949774f Mon Sep 17 00:00:00 2001 From: Mineplay Date: Fri, 18 Apr 2025 10:05:29 -0500 Subject: [PATCH 2/4] feat(memory management): implemented memeory copy function --- Include/Hallocy/Core/Memory.h | 3 +- Src/Core/Memory.c | 166 ++++++++++++++++++++++++++++++++-- Tests/Main.c | 6 +- 3 files changed, 166 insertions(+), 9 deletions(-) diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h index 910dc15..b41c05c 100644 --- a/Include/Hallocy/Core/Memory.h +++ b/Include/Hallocy/Core/Memory.h @@ -28,6 +28,7 @@ #include "../Utils/Error.h" -HallocyError hallocy_set_memory(void *destination, int value, const size_t count); +HallocyError hallocy_set_memory(void *destination, int value, const size_t size); +HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size); #endif \ No newline at end of file diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 2935d07..0710701 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -26,13 +26,13 @@ #include #include -HallocyError hallocy_set_memory(void *destination, int value, const size_t count) { +HallocyError hallocy_set_memory(void *destination, int value, const size_t size) { if (destination == NULL) { return HALLOCY_ERROR_INVALID_POINTER; } unsigned char *destination_bytes = (unsigned char*)destination; - unsigned char *end_address = destination_bytes + count; + unsigned char *end_address = destination_bytes + size; unsigned char value_bytes = (unsigned char)value; @@ -60,7 +60,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m512i simd_value = _mm512_set1_epi8(value_bytes); while (destination_bytes - end_address >= 64) { - _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); + _mm512_store_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; } @@ -75,7 +75,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m256i simd_value = _mm256_set1_epi8(value_bytes); while (destination_bytes - end_address >= 32) { - _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); + _mm256_store_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; } @@ -92,14 +92,12 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m128i simd_value = _mm_set1_epi8(value_bytes); while (destination_bytes - end_address >= 16) { - _mm_storeu_si128((__m128i*)destination_bytes, simd_value); + _mm_store_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; } break; } - - #endif default: { size_t word_size = sizeof(size_t); @@ -129,5 +127,159 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count destination_bytes += 1; } + return HALLOCY_ERROR_NONE; +} + +HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size) { + if (destination == NULL || source == NULL) { + return HALLOCY_ERROR_INVALID_POINTER; + } + + unsigned char *destination_bytes = (unsigned char*)destination; + unsigned char *source_bytes = (unsigned char*)source; + + unsigned char *end_address = destination_bytes + size; + + switch (hallocy_is_simd_supported()) { + #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + case HALLOCY_SIMD_NEON: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + } + + uint8x16_t simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = vdupq_n_u8(source_bytes); + vst1q_u8(destination_bytes, simd_value); + destination_bytes += 16; + source_bytes += 16; + } + break; + } + #else + case HALLOCY_SIMD_AVX512: { + if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m512i simd_value; + while (destination_bytes - end_address >= 64) { + simd_value = _mm512_load_si512((__m512i*)source_bytes); + _mm512_store_si512((__m512i*)destination, simd_value); + + destination_bytes += 64; + source_bytes += 64; + } + } else { + __m512i simd_value; + while (destination_bytes - end_address >= 64) { + simd_value = _mm512_loadu_si512((__m512i*)source_bytes); + _mm512_storeu_si512((__m512i*)destination, simd_value); + + destination_bytes += 64; + source_bytes += 64; + } + } + break; + } + + case HALLOCY_SIMD_AVX2: + case HALLOCY_SIMD_AVX: { + if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m256i simd_value; + while (destination_bytes - end_address >= 32) { + simd_value = _mm256_load_si256((__m256i*)source_bytes); + _mm256_store_si256((__m256i*)destination, simd_value); + + destination_bytes += 32; + source_bytes += 32; + } + } else { + __m256i simd_value; + while (destination_bytes - end_address >= 32) { + simd_value = _mm256_loadu_si256((__m256i*)source_bytes); + _mm256_storeu_si256((__m256i*)destination, simd_value); + + destination_bytes += 32; + source_bytes += 32; + } + } + break; + } + + case HALLOCY_SIMD_SSE2: + case HALLOCY_SIMD_SSE: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m128i simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = _mm_load_si128((__m128i*)source_bytes); + _mm_store_si128((__m128i*)destination, simd_value); + + destination_bytes += 16; + source_bytes += 16; + } + } else { + __m128i simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = _mm_loadu_si128((__m128i*)source_bytes); + _mm_storeu_si128((__m128i*)destination, simd_value); + + destination_bytes += 16; + source_bytes += 16; + } + } + break; + } + #endif + default: { + size_t word_size = sizeof(size_t); + if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) { + while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + size_t *destination_word = (size_t*)destination_bytes; + size_t *source_word = (size_t*)source_bytes; + while ((unsigned char*)(destination_word + 1) < end_address) { + *destination_word = *source_word; + destination_word += 1; + source_word += 1; + } + + source_bytes = (unsigned char*)source_word; + destination_bytes = (unsigned char*)destination_word; + } + + break; + } + } + + while (destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + return HALLOCY_ERROR_NONE; } \ No newline at end of file diff --git a/Tests/Main.c b/Tests/Main.c index 8d43a56..4967b57 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -45,9 +45,13 @@ char *text = (char*)hallocy_malloc(36); hallocy_set_memory(text, 'T', 35); - text[100] = '\0'; + text[35] = '\0'; printf("%s\n", text); + char *text_copy = (char*)hallocy_malloc(36); + hallocy_copy_memory(text_copy, text, 36); + printf("%s\n", text_copy); + hallocy_free(text); return 0; From 4aa1913006e7c4a20942cabdaf021db0cd078fd4 Mon Sep 17 00:00:00 2001 From: Mineplay Date: Fri, 18 Apr 2025 12:13:11 -0500 Subject: [PATCH 3/4] feat(memory management): implemented memory move function --- Include/Hallocy/Core/Memory.h | 1 + Src/Core/Memory.c | 160 ++++++++++++++++++++++++++++++++++ Tests/Main.c | 9 +- 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h index b41c05c..b001d22 100644 --- a/Include/Hallocy/Core/Memory.h +++ b/Include/Hallocy/Core/Memory.h @@ -30,5 +30,6 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size); HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size); +HallocyError hallocy_move_memory(void *destination, void *source, const size_t size); #endif \ No newline at end of file diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 0710701..fd2e7e6 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -281,5 +281,165 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s source_bytes += 1; } + return HALLOCY_ERROR_NONE; +} + +HallocyError hallocy_move_memory(void *destination, void *source, const size_t size) { + if (!((void*)((size_t)source + size) > destination && source < destination)) { + return hallocy_copy_memory(destination, source, size); + } + + if (destination == NULL || source == NULL) { + return HALLOCY_ERROR_INVALID_POINTER; + } + + unsigned char *end_address = (unsigned char*)destination; + + unsigned char *destination_bytes = (unsigned char*)destination + size; + unsigned char *source_bytes = (unsigned char*)source + size; + + switch (hallocy_is_simd_supported()) { + #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + case HALLOCY_SIMD_NEON: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + *destination_bytes = *source_bytes; + } + } + + uint8x16_t simd_value; + while (end_address - destination_bytes >= 16) { + destination_bytes -= 16; + source_bytes -= 16; + + simd_value = vdupq_n_u8(source_bytes); + vst1q_u8(destination_bytes, simd_value); + } + break; + } + #else + case HALLOCY_SIMD_AVX512: { + if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + *destination_bytes = *source_bytes; + } + + __m512i simd_value; + while (end_address - destination_bytes >= 64) { + destination_bytes -= 64; + source_bytes -= 64; + + simd_value = _mm512_load_si512((__m512i*)source_bytes); + _mm512_store_si512((__m512i*)destination, simd_value); + } + } else { + __m512i simd_value; + while (end_address - destination_bytes >= 64) { + destination_bytes -= 64; + source_bytes -= 64; + + simd_value = _mm512_loadu_si512((__m512i*)source_bytes); + _mm512_storeu_si512((__m512i*)destination, simd_value); + } + } + break; + } + + case HALLOCY_SIMD_AVX2: + case HALLOCY_SIMD_AVX: { + if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + + *destination_bytes = *source_bytes; + } + + __m256i simd_value; + while (end_address - destination_bytes >= 32) { + destination_bytes -= 32; + source_bytes -= 32; + + simd_value = _mm256_load_si256((__m256i*)source_bytes); + _mm256_store_si256((__m256i*)destination, simd_value); + } + } else { + __m256i simd_value; + while (end_address - destination_bytes >= 32) { + destination_bytes -= 32; + source_bytes -= 32; + + simd_value = _mm256_loadu_si256((__m256i*)source_bytes); + _mm256_storeu_si256((__m256i*)destination, simd_value); + } + } + break; + } + + case HALLOCY_SIMD_SSE2: + case HALLOCY_SIMD_SSE: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + *destination_bytes = *source_bytes; + } + + __m128i simd_value; + while (end_address - destination_bytes >= 16) { + destination_bytes -= 16; + source_bytes -= 16; + + simd_value = _mm_load_si128((__m128i*)source_bytes); + _mm_store_si128((__m128i*)destination, simd_value); + } + } else { + __m128i simd_value; + while (end_address - destination_bytes >= 16) { + destination_bytes -= 16; + source_bytes -= 16; + + simd_value = _mm_loadu_si128((__m128i*)source_bytes); + _mm_storeu_si128((__m128i*)destination, simd_value); + } + } + break; + } + #endif + default: { + size_t word_size = sizeof(size_t); + if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) { + while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + *destination_bytes = *source_bytes; + } + + size_t *destination_word = (size_t*)destination_bytes; + size_t *source_word = (size_t*)source_bytes; + while ((unsigned char*)(destination_word - 1) > end_address) { + destination_word -= 1; + source_word -= 1; + *destination_word = *source_word; + } + + source_bytes = (unsigned char*)source_word; + destination_bytes = (unsigned char*)destination_word; + } + + break; + } + } + + while (destination_bytes != end_address) { + destination_bytes -= 1; + source_bytes -= 1; + *destination_bytes = *source_bytes; + } + return HALLOCY_ERROR_NONE; } \ No newline at end of file diff --git a/Tests/Main.c b/Tests/Main.c index 4967b57..09645e3 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -49,7 +49,14 @@ printf("%s\n", text); char *text_copy = (char*)hallocy_malloc(36); - hallocy_copy_memory(text_copy, text, 36); + hallocy_copy_memory(text_copy, text, 35); + printf("%s\n", text_copy); + + hallocy_set_memory(text_copy + 10, 'L', 25); + text_copy[14] = 'r'; + text_copy[24] = 'r'; + printf("%s\n", text_copy); + hallocy_move_memory(text_copy + 5, text_copy + 10, 15); printf("%s\n", text_copy); hallocy_free(text); From 4c72d22e9cc4c56c416907a3c1104add6b31b116 Mon Sep 17 00:00:00 2001 From: Mineplay Date: Sat, 19 Apr 2025 05:05:12 -0500 Subject: [PATCH 4/4] feat(memory management): implemented memory compare function --- Include/Hallocy/Core/Memory.h | 1 + Src/Core/Memory.c | 225 ++++++++++++++++++++++++++++++++-- Tests/Main.c | 13 ++ 3 files changed, 227 insertions(+), 12 deletions(-) diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h index b001d22..29e78d3 100644 --- a/Include/Hallocy/Core/Memory.h +++ b/Include/Hallocy/Core/Memory.h @@ -31,5 +31,6 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size); HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size); HallocyError hallocy_move_memory(void *destination, void *source, const size_t size); +bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size); #endif \ No newline at end of file diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index fd2e7e6..3fc4417 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -172,7 +172,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m512i simd_value; while (destination_bytes - end_address >= 64) { simd_value = _mm512_load_si512((__m512i*)source_bytes); - _mm512_store_si512((__m512i*)destination, simd_value); + _mm512_store_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; source_bytes += 64; @@ -181,7 +181,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m512i simd_value; while (destination_bytes - end_address >= 64) { simd_value = _mm512_loadu_si512((__m512i*)source_bytes); - _mm512_storeu_si512((__m512i*)destination, simd_value); + _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; source_bytes += 64; @@ -202,7 +202,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m256i simd_value; while (destination_bytes - end_address >= 32) { simd_value = _mm256_load_si256((__m256i*)source_bytes); - _mm256_store_si256((__m256i*)destination, simd_value); + _mm256_store_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; source_bytes += 32; @@ -211,7 +211,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m256i simd_value; while (destination_bytes - end_address >= 32) { simd_value = _mm256_loadu_si256((__m256i*)source_bytes); - _mm256_storeu_si256((__m256i*)destination, simd_value); + _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; source_bytes += 32; @@ -232,7 +232,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m128i simd_value; while (destination_bytes - end_address >= 16) { simd_value = _mm_load_si128((__m128i*)source_bytes); - _mm_store_si128((__m128i*)destination, simd_value); + _mm_store_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; source_bytes += 16; @@ -241,7 +241,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s __m128i simd_value; while (destination_bytes - end_address >= 16) { simd_value = _mm_loadu_si128((__m128i*)source_bytes); - _mm_storeu_si128((__m128i*)destination, simd_value); + _mm_storeu_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; source_bytes += 16; @@ -334,7 +334,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 64; simd_value = _mm512_load_si512((__m512i*)source_bytes); - _mm512_store_si512((__m512i*)destination, simd_value); + _mm512_store_si512((__m512i*)destination_bytes, simd_value); } } else { __m512i simd_value; @@ -343,7 +343,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 64; simd_value = _mm512_loadu_si512((__m512i*)source_bytes); - _mm512_storeu_si512((__m512i*)destination, simd_value); + _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); } } break; @@ -365,7 +365,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 32; simd_value = _mm256_load_si256((__m256i*)source_bytes); - _mm256_store_si256((__m256i*)destination, simd_value); + _mm256_store_si256((__m256i*)destination_bytes, simd_value); } } else { __m256i simd_value; @@ -374,7 +374,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 32; simd_value = _mm256_loadu_si256((__m256i*)source_bytes); - _mm256_storeu_si256((__m256i*)destination, simd_value); + _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); } } break; @@ -395,7 +395,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 16; simd_value = _mm_load_si128((__m128i*)source_bytes); - _mm_store_si128((__m128i*)destination, simd_value); + _mm_store_si128((__m128i*)destination_bytes, simd_value); } } else { __m128i simd_value; @@ -404,7 +404,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s source_bytes -= 16; simd_value = _mm_loadu_si128((__m128i*)source_bytes); - _mm_storeu_si128((__m128i*)destination, simd_value); + _mm_storeu_si128((__m128i*)destination_bytes, simd_value); } } break; @@ -442,4 +442,205 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s } return HALLOCY_ERROR_NONE; +} + +bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size) { + if (left_side == NULL || right_side == NULL) { + return false; + } + + unsigned char *left_side_bytes = (unsigned char*)left_side; + unsigned char *right_side_bytes = (unsigned char*)right_side; + + unsigned char *end_address = left_side_bytes + size; + + switch (hallocy_is_simd_supported()) { + #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + case HALLOCY_SIMD_NEON: { + if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { + while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { + if (*left_side_bytes != *right_side_bytes) { + return false; + } + + left_side_bytes += 1; + right_side_bytes += 1; + } + } + + while (end_address - destination_bytes >= 16) { + uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes); + uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes); + + uint8x16_t result = vceqq_u8(simd_left_side_value, simd_right_side_value); + if (vmaxvq_u8(result) != 0xFF) { + return false; + } + + left_side_bytes += 16; + right_side_bytes += 16 + } + break; + } + #else + case HALLOCY_SIMD_AVX512: { + if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) { + while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) { + if (*left_side_bytes != *right_side_bytes) { + return false; + } + + left_side_bytes += 1; + right_side_bytes += 1; + } + + while (left_side_bytes - end_address >= 64) { + __m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes); + __m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes); + + __m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value); + if (_mm512_test_epi64_mask(result, result) != 0) { + return false; + } + + left_side_bytes += 64; + right_side_bytes += 64; + } + } else { + while (left_side_bytes - end_address >= 64) { + __m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes); + __m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes); + + __m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value); + if (_mm512_test_epi64_mask(result, result) != 0) { + return false; + } + + left_side_bytes += 64; + right_side_bytes += 64; + } + } + break; + } + + case HALLOCY_SIMD_AVX2: + case HALLOCY_SIMD_AVX: { + if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) { + while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) { + if (*left_side_bytes != *right_side_bytes) { + return false; + } + + left_side_bytes += 1; + right_side_bytes += 1; + } + + while (left_side_bytes - end_address >= 32) { + __m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes); + __m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes); + + __m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value); + if (_mm256_testz_si256(result, result) == 0) { + return false; + } + + left_side_bytes += 32; + right_side_bytes += 32; + } + } else { + while (left_side_bytes - end_address >= 32) { + __m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes); + __m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes); + + __m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value); + if (_mm256_testz_si256(result, result) == 0) { + return false; + } + + left_side_bytes += 32; + right_side_bytes += 32; + } + } + break; + } + + case HALLOCY_SIMD_SSE2: + case HALLOCY_SIMD_SSE: { + if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { + while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { + if (*left_side_bytes != *right_side_bytes) { + return false; + } + + left_side_bytes += 1; + right_side_bytes += 1; + } + + while (left_side_bytes - end_address >= 16) { + __m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes); + __m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes); + + __m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value); + if (_mm_testz_si128(result, result) == 0) { + return false; + } + + left_side_bytes += 16; + right_side_bytes += 16; + } + } else { + while (left_side_bytes - end_address >= 16) { + __m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes); + __m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes); + + __m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value); + if (_mm_testz_si128(result, result) == 0) { + return false; + } + + left_side_bytes += 16; + right_side_bytes += 16; + } + } + break; + } + #endif + default: { + size_t word_size = sizeof(size_t); + if ((size_t)left_side_bytes % word_size == (size_t)right_side_bytes % word_size) { + while (((size_t)left_side_bytes % word_size) != 0 && left_side_bytes != end_address) { + *left_side_bytes = *right_side_bytes; + left_side_bytes += 1; + right_side_bytes += 1; + } + + size_t *left_side_word = (size_t*)left_side_bytes; + size_t *right_side_word = (size_t*)right_side_bytes; + while ((unsigned char*)(left_side_word + 1) < end_address) { + if (*left_side_word != *right_side_word) { + return false; + } + + left_side_word += 1; + right_side_word += 1; + } + + right_side_bytes = (unsigned char*)right_side_word; + left_side_bytes = (unsigned char*)left_side_word; + } + + break; + } + } + + while (left_side_bytes != end_address) { + if (*left_side_bytes != *right_side_bytes) { + return false; + } + + left_side_bytes += 1; + right_side_bytes += 1; + } + + return true; } \ No newline at end of file diff --git a/Tests/Main.c b/Tests/Main.c index 09645e3..934c082 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -59,6 +59,19 @@ hallocy_move_memory(text_copy + 5, text_copy + 10, 15); printf("%s\n", text_copy); + if (!hallocy_compare_memory(text, text_copy, 36)) { + printf("text and text_copy are not equal\n"); + } + + char *text_copy_copy = (char*)hallocy_malloc(36); + hallocy_copy_memory(text_copy_copy, text_copy, 36); + + if (hallocy_compare_memory(text_copy_copy, text_copy, 36)) { + printf("text_copy_copy and text_copy are equal\n"); + } + + hallocy_free(text_copy_copy); + hallocy_free(text_copy); hallocy_free(text); return 0;