feat(memory management): implemented memory compare function

This commit is contained in:
Mineplay 2025-04-19 05:05:12 -05:00
parent 4aa1913006
commit 4c72d22e9c
3 changed files with 227 additions and 12 deletions

View file

@ -31,5 +31,6 @@
HallocyError hallocy_set_memory(void *destination, int value, const size_t size);
HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size);
HallocyError hallocy_move_memory(void *destination, void *source, const size_t size);
bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size);
#endif

View file

@ -172,7 +172,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m512i simd_value;
while (destination_bytes - end_address >= 64) {
simd_value = _mm512_load_si512((__m512i*)source_bytes);
_mm512_store_si512((__m512i*)destination, simd_value);
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
destination_bytes += 64;
source_bytes += 64;
@ -181,7 +181,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m512i simd_value;
while (destination_bytes - end_address >= 64) {
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
_mm512_storeu_si512((__m512i*)destination, simd_value);
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
destination_bytes += 64;
source_bytes += 64;
@ -202,7 +202,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m256i simd_value;
while (destination_bytes - end_address >= 32) {
simd_value = _mm256_load_si256((__m256i*)source_bytes);
_mm256_store_si256((__m256i*)destination, simd_value);
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
destination_bytes += 32;
source_bytes += 32;
@ -211,7 +211,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m256i simd_value;
while (destination_bytes - end_address >= 32) {
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
_mm256_storeu_si256((__m256i*)destination, simd_value);
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
destination_bytes += 32;
source_bytes += 32;
@ -232,7 +232,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m128i simd_value;
while (destination_bytes - end_address >= 16) {
simd_value = _mm_load_si128((__m128i*)source_bytes);
_mm_store_si128((__m128i*)destination, simd_value);
_mm_store_si128((__m128i*)destination_bytes, simd_value);
destination_bytes += 16;
source_bytes += 16;
@ -241,7 +241,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
__m128i simd_value;
while (destination_bytes - end_address >= 16) {
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
_mm_storeu_si128((__m128i*)destination, simd_value);
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
destination_bytes += 16;
source_bytes += 16;
@ -334,7 +334,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 64;
simd_value = _mm512_load_si512((__m512i*)source_bytes);
_mm512_store_si512((__m512i*)destination, simd_value);
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
}
} else {
__m512i simd_value;
@ -343,7 +343,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 64;
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
_mm512_storeu_si512((__m512i*)destination, simd_value);
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
}
}
break;
@ -365,7 +365,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 32;
simd_value = _mm256_load_si256((__m256i*)source_bytes);
_mm256_store_si256((__m256i*)destination, simd_value);
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
}
} else {
__m256i simd_value;
@ -374,7 +374,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 32;
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
_mm256_storeu_si256((__m256i*)destination, simd_value);
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
}
}
break;
@ -395,7 +395,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 16;
simd_value = _mm_load_si128((__m128i*)source_bytes);
_mm_store_si128((__m128i*)destination, simd_value);
_mm_store_si128((__m128i*)destination_bytes, simd_value);
}
} else {
__m128i simd_value;
@ -404,7 +404,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
source_bytes -= 16;
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
_mm_storeu_si128((__m128i*)destination, simd_value);
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
}
}
break;
@ -443,3 +443,204 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
return HALLOCY_ERROR_NONE;
}
bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size) {
if (left_side == NULL || right_side == NULL) {
return false;
}
unsigned char *left_side_bytes = (unsigned char*)left_side;
unsigned char *right_side_bytes = (unsigned char*)right_side;
unsigned char *end_address = left_side_bytes + size;
switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
case HALLOCY_SIMD_NEON: {
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) {
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) {
return false;
}
left_side_bytes += 1;
right_side_bytes += 1;
}
}
while (end_address - destination_bytes >= 16) {
uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes);
uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes);
uint8x16_t result = vceqq_u8(simd_left_side_value, simd_right_side_value);
if (vmaxvq_u8(result) != 0xFF) {
return false;
}
left_side_bytes += 16;
right_side_bytes += 16
}
break;
}
#else
case HALLOCY_SIMD_AVX512: {
if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) {
while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) {
return false;
}
left_side_bytes += 1;
right_side_bytes += 1;
}
while (left_side_bytes - end_address >= 64) {
__m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes);
__m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes);
__m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value);
if (_mm512_test_epi64_mask(result, result) != 0) {
return false;
}
left_side_bytes += 64;
right_side_bytes += 64;
}
} else {
while (left_side_bytes - end_address >= 64) {
__m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes);
__m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes);
__m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value);
if (_mm512_test_epi64_mask(result, result) != 0) {
return false;
}
left_side_bytes += 64;
right_side_bytes += 64;
}
}
break;
}
case HALLOCY_SIMD_AVX2:
case HALLOCY_SIMD_AVX: {
if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) {
while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) {
return false;
}
left_side_bytes += 1;
right_side_bytes += 1;
}
while (left_side_bytes - end_address >= 32) {
__m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes);
__m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes);
__m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value);
if (_mm256_testz_si256(result, result) == 0) {
return false;
}
left_side_bytes += 32;
right_side_bytes += 32;
}
} else {
while (left_side_bytes - end_address >= 32) {
__m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes);
__m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes);
__m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value);
if (_mm256_testz_si256(result, result) == 0) {
return false;
}
left_side_bytes += 32;
right_side_bytes += 32;
}
}
break;
}
case HALLOCY_SIMD_SSE2:
case HALLOCY_SIMD_SSE: {
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) {
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) {
return false;
}
left_side_bytes += 1;
right_side_bytes += 1;
}
while (left_side_bytes - end_address >= 16) {
__m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes);
__m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes);
__m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value);
if (_mm_testz_si128(result, result) == 0) {
return false;
}
left_side_bytes += 16;
right_side_bytes += 16;
}
} else {
while (left_side_bytes - end_address >= 16) {
__m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes);
__m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes);
__m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value);
if (_mm_testz_si128(result, result) == 0) {
return false;
}
left_side_bytes += 16;
right_side_bytes += 16;
}
}
break;
}
#endif
default: {
size_t word_size = sizeof(size_t);
if ((size_t)left_side_bytes % word_size == (size_t)right_side_bytes % word_size) {
while (((size_t)left_side_bytes % word_size) != 0 && left_side_bytes != end_address) {
*left_side_bytes = *right_side_bytes;
left_side_bytes += 1;
right_side_bytes += 1;
}
size_t *left_side_word = (size_t*)left_side_bytes;
size_t *right_side_word = (size_t*)right_side_bytes;
while ((unsigned char*)(left_side_word + 1) < end_address) {
if (*left_side_word != *right_side_word) {
return false;
}
left_side_word += 1;
right_side_word += 1;
}
right_side_bytes = (unsigned char*)right_side_word;
left_side_bytes = (unsigned char*)left_side_word;
}
break;
}
}
while (left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) {
return false;
}
left_side_bytes += 1;
right_side_bytes += 1;
}
return true;
}

View file

@ -59,6 +59,19 @@
hallocy_move_memory(text_copy + 5, text_copy + 10, 15);
printf("%s\n", text_copy);
if (!hallocy_compare_memory(text, text_copy, 36)) {
printf("text and text_copy are not equal\n");
}
char *text_copy_copy = (char*)hallocy_malloc(36);
hallocy_copy_memory(text_copy_copy, text_copy, 36);
if (hallocy_compare_memory(text_copy_copy, text_copy, 36)) {
printf("text_copy_copy and text_copy are equal\n");
}
hallocy_free(text_copy_copy);
hallocy_free(text_copy);
hallocy_free(text);
return 0;