From 3ae91ad59c51c39ceb6d2aae78dc20ff5949774f Mon Sep 17 00:00:00 2001 From: Mineplay Date: Fri, 18 Apr 2025 10:05:29 -0500 Subject: [PATCH] feat(memory management): implemented memeory copy function --- Include/Hallocy/Core/Memory.h | 3 +- Src/Core/Memory.c | 166 ++++++++++++++++++++++++++++++++-- Tests/Main.c | 6 +- 3 files changed, 166 insertions(+), 9 deletions(-) diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h index 910dc15..b41c05c 100644 --- a/Include/Hallocy/Core/Memory.h +++ b/Include/Hallocy/Core/Memory.h @@ -28,6 +28,7 @@ #include "../Utils/Error.h" -HallocyError hallocy_set_memory(void *destination, int value, const size_t count); +HallocyError hallocy_set_memory(void *destination, int value, const size_t size); +HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size); #endif \ No newline at end of file diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 2935d07..0710701 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -26,13 +26,13 @@ #include #include -HallocyError hallocy_set_memory(void *destination, int value, const size_t count) { +HallocyError hallocy_set_memory(void *destination, int value, const size_t size) { if (destination == NULL) { return HALLOCY_ERROR_INVALID_POINTER; } unsigned char *destination_bytes = (unsigned char*)destination; - unsigned char *end_address = destination_bytes + count; + unsigned char *end_address = destination_bytes + size; unsigned char value_bytes = (unsigned char)value; @@ -60,7 +60,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m512i simd_value = _mm512_set1_epi8(value_bytes); while (destination_bytes - end_address >= 64) { - _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); + _mm512_store_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; } @@ -75,7 +75,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m256i simd_value = _mm256_set1_epi8(value_bytes); while (destination_bytes - end_address >= 32) { - _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); + _mm256_store_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; } @@ -92,14 +92,12 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count __m128i simd_value = _mm_set1_epi8(value_bytes); while (destination_bytes - end_address >= 16) { - _mm_storeu_si128((__m128i*)destination_bytes, simd_value); + _mm_store_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; } break; } - - #endif default: { size_t word_size = sizeof(size_t); @@ -129,5 +127,159 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count destination_bytes += 1; } + return HALLOCY_ERROR_NONE; +} + +HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size) { + if (destination == NULL || source == NULL) { + return HALLOCY_ERROR_INVALID_POINTER; + } + + unsigned char *destination_bytes = (unsigned char*)destination; + unsigned char *source_bytes = (unsigned char*)source; + + unsigned char *end_address = destination_bytes + size; + + switch (hallocy_is_simd_supported()) { + #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + case HALLOCY_SIMD_NEON: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + } + + uint8x16_t simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = vdupq_n_u8(source_bytes); + vst1q_u8(destination_bytes, simd_value); + destination_bytes += 16; + source_bytes += 16; + } + break; + } + #else + case HALLOCY_SIMD_AVX512: { + if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m512i simd_value; + while (destination_bytes - end_address >= 64) { + simd_value = _mm512_load_si512((__m512i*)source_bytes); + _mm512_store_si512((__m512i*)destination, simd_value); + + destination_bytes += 64; + source_bytes += 64; + } + } else { + __m512i simd_value; + while (destination_bytes - end_address >= 64) { + simd_value = _mm512_loadu_si512((__m512i*)source_bytes); + _mm512_storeu_si512((__m512i*)destination, simd_value); + + destination_bytes += 64; + source_bytes += 64; + } + } + break; + } + + case HALLOCY_SIMD_AVX2: + case HALLOCY_SIMD_AVX: { + if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m256i simd_value; + while (destination_bytes - end_address >= 32) { + simd_value = _mm256_load_si256((__m256i*)source_bytes); + _mm256_store_si256((__m256i*)destination, simd_value); + + destination_bytes += 32; + source_bytes += 32; + } + } else { + __m256i simd_value; + while (destination_bytes - end_address >= 32) { + simd_value = _mm256_loadu_si256((__m256i*)source_bytes); + _mm256_storeu_si256((__m256i*)destination, simd_value); + + destination_bytes += 32; + source_bytes += 32; + } + } + break; + } + + case HALLOCY_SIMD_SSE2: + case HALLOCY_SIMD_SSE: { + if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { + while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + __m128i simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = _mm_load_si128((__m128i*)source_bytes); + _mm_store_si128((__m128i*)destination, simd_value); + + destination_bytes += 16; + source_bytes += 16; + } + } else { + __m128i simd_value; + while (destination_bytes - end_address >= 16) { + simd_value = _mm_loadu_si128((__m128i*)source_bytes); + _mm_storeu_si128((__m128i*)destination, simd_value); + + destination_bytes += 16; + source_bytes += 16; + } + } + break; + } + #endif + default: { + size_t word_size = sizeof(size_t); + if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) { + while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + + size_t *destination_word = (size_t*)destination_bytes; + size_t *source_word = (size_t*)source_bytes; + while ((unsigned char*)(destination_word + 1) < end_address) { + *destination_word = *source_word; + destination_word += 1; + source_word += 1; + } + + source_bytes = (unsigned char*)source_word; + destination_bytes = (unsigned char*)destination_word; + } + + break; + } + } + + while (destination_bytes != end_address) { + *destination_bytes = *source_bytes; + destination_bytes += 1; + source_bytes += 1; + } + return HALLOCY_ERROR_NONE; } \ No newline at end of file diff --git a/Tests/Main.c b/Tests/Main.c index 8d43a56..4967b57 100644 --- a/Tests/Main.c +++ b/Tests/Main.c @@ -45,9 +45,13 @@ char *text = (char*)hallocy_malloc(36); hallocy_set_memory(text, 'T', 35); - text[100] = '\0'; + text[35] = '\0'; printf("%s\n", text); + char *text_copy = (char*)hallocy_malloc(36); + hallocy_copy_memory(text_copy, text, 36); + printf("%s\n", text_copy); + hallocy_free(text); return 0;