refactor(linting): removed magical numbers from memory functions

This commit is contained in:
Mineplay 2025-10-05 10:02:50 -05:00
parent 7e2b9e5045
commit 73eeb4ef70
3 changed files with 105 additions and 101 deletions

View file

@ -39,6 +39,11 @@
#endif #endif
#endif #endif
#define SIMD_64_WIDTH 8
#define SIMD_128_WIDTH 16
#define SIMD_256_WIDTH 32
#define SIMD_512_WIDTH 64
typedef enum { typedef enum {
HALLOCY_SIMD_UNDEFINED = 0, HALLOCY_SIMD_UNDEFINED = 0,
HALLOCY_SIMD_NONE = 1, HALLOCY_SIMD_NONE = 1,

View file

@ -39,44 +39,44 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
switch (hallocy_is_simd_supported()) { switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
case HALLOCY_SIMD_NEON: { case HALLOCY_SIMD_NEON: {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes; *destination_bytes = value_bytes;
destination_bytes += 1; destination_bytes += 1;
} }
uint8x16_t simd_value = vdupq_n_u8(value_bytes); uint8x16_t simd_value = vdupq_n_u8(value_bytes);
while (destination_bytes - end_address >= 16) { while (destination_bytes - end_address >= SIMD_128_WIDTH) {
vst1q_u8(destination_bytes, simd_value); vst1q_u8(destination_bytes, simd_value);
destination_bytes += 16; destination_bytes += SIMD_128_WIDTH;
} }
break; break;
} }
#else #else
case HALLOCY_SIMD_AVX512: { case HALLOCY_SIMD_AVX512: {
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes; *destination_bytes = value_bytes;
destination_bytes += 1; destination_bytes += 1;
} }
__m512i simd_value = _mm512_set1_epi8(value_bytes); __m512i simd_value = _mm512_set1_epi8(value_bytes);
while (destination_bytes - end_address >= 64) { while (destination_bytes - end_address >= SIMD_512_WIDTH) {
_mm512_store_si512((__m512i*)destination_bytes, simd_value); _mm512_store_si512((__m512i*)destination_bytes, simd_value);
destination_bytes += 64; destination_bytes += SIMD_512_WIDTH;
} }
break; break;
} }
case HALLOCY_SIMD_AVX2: { case HALLOCY_SIMD_AVX2: {
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes; *destination_bytes = value_bytes;
destination_bytes += 1; destination_bytes += 1;
} }
__m256i simd_value = _mm256_set1_epi8(value_bytes); __m256i simd_value = _mm256_set1_epi8(value_bytes);
while (destination_bytes - end_address >= 32) { while (destination_bytes - end_address >= SIMD_256_WIDTH) {
_mm256_store_si256((__m256i*)destination_bytes, simd_value); _mm256_store_si256((__m256i*)destination_bytes, simd_value);
destination_bytes += 32; destination_bytes += SIMD_256_WIDTH;
} }
break; break;
@ -85,15 +85,15 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
case HALLOCY_SIMD_AVX: case HALLOCY_SIMD_AVX:
case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE2:
case HALLOCY_SIMD_SSE: { case HALLOCY_SIMD_SSE: {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes; *destination_bytes = value_bytes;
destination_bytes += 1; destination_bytes += 1;
} }
__m128i simd_value = _mm_set1_epi8(value_bytes); __m128i simd_value = _mm_set1_epi8(value_bytes);
while (destination_bytes - end_address >= 16) { while (destination_bytes - end_address >= SIMD_128_WIDTH) {
_mm_store_si128((__m128i*)destination_bytes, simd_value); _mm_store_si128((__m128i*)destination_bytes, simd_value);
destination_bytes += 16; destination_bytes += SIMD_128_WIDTH;
} }
break; break;
@ -108,7 +108,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
size_t value_word = 0; size_t value_word = 0;
for (size_t i = 0; i < word_size; i++) { for (size_t i = 0; i < word_size; i++) {
value_word |= (size_t)value_bytes << (i * 8); value_word |= (size_t)value_bytes << (i * SIMD_64_WIDTH);
} }
size_t *destination_word = (size_t*)destination_bytes; size_t *destination_word = (size_t*)destination_bytes;
@ -143,8 +143,8 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
switch (hallocy_is_simd_supported()) { switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
case HALLOCY_SIMD_NEON: { case HALLOCY_SIMD_NEON: {
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
destination_bytes += 1; destination_bytes += 1;
source_bytes += 1; source_bytes += 1;
@ -152,39 +152,39 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
} }
uint8x16_t simd_value; uint8x16_t simd_value;
while (destination_bytes - end_address >= 16) { while (destination_bytes - end_address >= SIMD_128_WIDTH) {
simd_value = vdupq_n_u8(source_bytes); simd_value = vdupq_n_u8(source_bytes);
vst1q_u8(destination_bytes, simd_value); vst1q_u8(destination_bytes, simd_value);
destination_bytes += 16; destination_bytes += SIMD_128_WIDTH;
source_bytes += 16; source_bytes += SIMD_128_WIDTH;
} }
break; break;
} }
#else #else
case HALLOCY_SIMD_AVX512: { case HALLOCY_SIMD_AVX512: {
if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
destination_bytes += 1; destination_bytes += 1;
source_bytes += 1; source_bytes += 1;
} }
__m512i simd_value; __m512i simd_value;
while (destination_bytes - end_address >= 64) { while (destination_bytes - end_address >= SIMD_512_WIDTH) {
simd_value = _mm512_load_si512((__m512i*)source_bytes); simd_value = _mm512_load_si512((__m512i*)source_bytes);
_mm512_store_si512((__m512i*)destination_bytes, simd_value); _mm512_store_si512((__m512i*)destination_bytes, simd_value);
destination_bytes += 64; destination_bytes += SIMD_512_WIDTH;
source_bytes += 64; source_bytes += SIMD_512_WIDTH;
} }
} else { } else {
__m512i simd_value; __m512i simd_value;
while (destination_bytes - end_address >= 64) { while (destination_bytes - end_address >= SIMD_512_WIDTH) {
simd_value = _mm512_loadu_si512((__m512i*)source_bytes); simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
destination_bytes += 64; destination_bytes += SIMD_512_WIDTH;
source_bytes += 64; source_bytes += SIMD_512_WIDTH;
} }
} }
break; break;
@ -192,29 +192,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX2:
case HALLOCY_SIMD_AVX: { case HALLOCY_SIMD_AVX: {
if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_256_WIDTH) {
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
destination_bytes += 1; destination_bytes += 1;
source_bytes += 1; source_bytes += 1;
} }
__m256i simd_value; __m256i simd_value;
while (destination_bytes - end_address >= 32) { while (destination_bytes - end_address >= SIMD_256_WIDTH) {
simd_value = _mm256_load_si256((__m256i*)source_bytes); simd_value = _mm256_load_si256((__m256i*)source_bytes);
_mm256_store_si256((__m256i*)destination_bytes, simd_value); _mm256_store_si256((__m256i*)destination_bytes, simd_value);
destination_bytes += 32; destination_bytes += SIMD_256_WIDTH;
source_bytes += 32; source_bytes += SIMD_256_WIDTH;
} }
} else { } else {
__m256i simd_value; __m256i simd_value;
while (destination_bytes - end_address >= 32) { while (destination_bytes - end_address >= SIMD_256_WIDTH) {
simd_value = _mm256_loadu_si256((__m256i*)source_bytes); simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
destination_bytes += 32; destination_bytes += SIMD_256_WIDTH;
source_bytes += 32; source_bytes += SIMD_256_WIDTH;
} }
} }
break; break;
@ -222,29 +222,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE2:
case HALLOCY_SIMD_SSE: { case HALLOCY_SIMD_SSE: {
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
destination_bytes += 1; destination_bytes += 1;
source_bytes += 1; source_bytes += 1;
} }
__m128i simd_value; __m128i simd_value;
while (destination_bytes - end_address >= 16) { while (destination_bytes - end_address >= SIMD_128_WIDTH) {
simd_value = _mm_load_si128((__m128i*)source_bytes); simd_value = _mm_load_si128((__m128i*)source_bytes);
_mm_store_si128((__m128i*)destination_bytes, simd_value); _mm_store_si128((__m128i*)destination_bytes, simd_value);
destination_bytes += 16; destination_bytes += SIMD_128_WIDTH;
source_bytes += 16; source_bytes += SIMD_128_WIDTH;
} }
} else { } else {
__m128i simd_value; __m128i simd_value;
while (destination_bytes - end_address >= 16) { while (destination_bytes - end_address >= SIMD_128_WIDTH) {
simd_value = _mm_loadu_si128((__m128i*)source_bytes); simd_value = _mm_loadu_si128((__m128i*)source_bytes);
_mm_storeu_si128((__m128i*)destination_bytes, simd_value); _mm_storeu_si128((__m128i*)destination_bytes, simd_value);
destination_bytes += 16; destination_bytes += SIMD_128_WIDTH;
source_bytes += 16; source_bytes += SIMD_128_WIDTH;
} }
} }
break; break;
@ -301,8 +301,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
switch (hallocy_is_simd_supported()) { switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
case HALLOCY_SIMD_NEON: { case HALLOCY_SIMD_NEON: {
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
destination_bytes -= 1; destination_bytes -= 1;
source_bytes -= 1; source_bytes -= 1;
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
@ -310,9 +310,9 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
} }
uint8x16_t simd_value; uint8x16_t simd_value;
while (end_address - destination_bytes >= 16) { while (end_address - destination_bytes >= SIMD_128_WIDTH) {
destination_bytes -= 16; destination_bytes -= SIMD_128_WIDTH;
source_bytes -= 16; source_bytes -= SIMD_128_WIDTH;
simd_value = vdupq_n_u8(source_bytes); simd_value = vdupq_n_u8(source_bytes);
vst1q_u8(destination_bytes, simd_value); vst1q_u8(destination_bytes, simd_value);
@ -321,26 +321,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
} }
#else #else
case HALLOCY_SIMD_AVX512: { case HALLOCY_SIMD_AVX512: {
if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
destination_bytes -= 1; destination_bytes -= 1;
source_bytes -= 1; source_bytes -= 1;
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
} }
__m512i simd_value; __m512i simd_value;
while (end_address - destination_bytes >= 64) { while (end_address - destination_bytes >= SIMD_512_WIDTH) {
destination_bytes -= 64; destination_bytes -= SIMD_512_WIDTH;
source_bytes -= 64; source_bytes -= SIMD_512_WIDTH;
simd_value = _mm512_load_si512((__m512i*)source_bytes); simd_value = _mm512_load_si512((__m512i*)source_bytes);
_mm512_store_si512((__m512i*)destination_bytes, simd_value); _mm512_store_si512((__m512i*)destination_bytes, simd_value);
} }
} else { } else {
__m512i simd_value; __m512i simd_value;
while (end_address - destination_bytes >= 64) { while (end_address - destination_bytes >= SIMD_512_WIDTH) {
destination_bytes -= 64; destination_bytes -= SIMD_512_WIDTH;
source_bytes -= 64; source_bytes -= SIMD_512_WIDTH;
simd_value = _mm512_loadu_si512((__m512i*)source_bytes); simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
@ -351,8 +351,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX2:
case HALLOCY_SIMD_AVX: { case HALLOCY_SIMD_AVX: {
if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
destination_bytes -= 1; destination_bytes -= 1;
source_bytes -= 1; source_bytes -= 1;
@ -360,18 +360,18 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
} }
__m256i simd_value; __m256i simd_value;
while (end_address - destination_bytes >= 32) { while (end_address - destination_bytes >= SIMD_256_WIDTH) {
destination_bytes -= 32; destination_bytes -= SIMD_256_WIDTH;
source_bytes -= 32; source_bytes -= SIMD_256_WIDTH;
simd_value = _mm256_load_si256((__m256i*)source_bytes); simd_value = _mm256_load_si256((__m256i*)source_bytes);
_mm256_store_si256((__m256i*)destination_bytes, simd_value); _mm256_store_si256((__m256i*)destination_bytes, simd_value);
} }
} else { } else {
__m256i simd_value; __m256i simd_value;
while (end_address - destination_bytes >= 32) { while (end_address - destination_bytes >= SIMD_256_WIDTH) {
destination_bytes -= 32; destination_bytes -= SIMD_256_WIDTH;
source_bytes -= 32; source_bytes -= SIMD_256_WIDTH;
simd_value = _mm256_loadu_si256((__m256i*)source_bytes); simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
@ -382,26 +382,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE2:
case HALLOCY_SIMD_SSE: { case HALLOCY_SIMD_SSE: {
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
destination_bytes -= 1; destination_bytes -= 1;
source_bytes -= 1; source_bytes -= 1;
*destination_bytes = *source_bytes; *destination_bytes = *source_bytes;
} }
__m128i simd_value; __m128i simd_value;
while (end_address - destination_bytes >= 16) { while (end_address - destination_bytes >= SIMD_128_WIDTH) {
destination_bytes -= 16; destination_bytes -= SIMD_128_WIDTH;
source_bytes -= 16; source_bytes -= SIMD_128_WIDTH;
simd_value = _mm_load_si128((__m128i*)source_bytes); simd_value = _mm_load_si128((__m128i*)source_bytes);
_mm_store_si128((__m128i*)destination_bytes, simd_value); _mm_store_si128((__m128i*)destination_bytes, simd_value);
} }
} else { } else {
__m128i simd_value; __m128i simd_value;
while (end_address - destination_bytes >= 16) { while (end_address - destination_bytes >= SIMD_128_WIDTH) {
destination_bytes -= 16; destination_bytes -= SIMD_128_WIDTH;
source_bytes -= 16; source_bytes -= SIMD_128_WIDTH;
simd_value = _mm_loadu_si128((__m128i*)source_bytes); simd_value = _mm_loadu_si128((__m128i*)source_bytes);
_mm_storeu_si128((__m128i*)destination_bytes, simd_value); _mm_storeu_si128((__m128i*)destination_bytes, simd_value);
@ -457,8 +457,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
switch (hallocy_is_simd_supported()) { switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
case HALLOCY_SIMD_NEON: { case HALLOCY_SIMD_NEON: {
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) { if (*left_side_bytes != *right_side_bytes) {
return false; return false;
} }
@ -468,7 +468,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
} }
} }
while (end_address - destination_bytes >= 16) { while (end_address - destination_bytes >= SIMD_128_WIDTH) {
uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes); uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes);
uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes); uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes);
@ -477,15 +477,15 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 16; left_side_bytes += SIMD_128_WIDTH;
right_side_bytes += 16 right_side_bytes += SIMD_128_WIDTH;
} }
break; break;
} }
#else #else
case HALLOCY_SIMD_AVX512: { case HALLOCY_SIMD_AVX512: {
if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) { if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) {
while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) { while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) { if (*left_side_bytes != *right_side_bytes) {
return false; return false;
} }
@ -494,7 +494,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
right_side_bytes += 1; right_side_bytes += 1;
} }
while (left_side_bytes - end_address >= 64) { while (left_side_bytes - end_address >= SIMD_512_WIDTH) {
__m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes); __m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes);
__m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes); __m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes);
@ -503,11 +503,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 64; left_side_bytes += SIMD_512_WIDTH;
right_side_bytes += 64; right_side_bytes += SIMD_512_WIDTH;
} }
} else { } else {
while (left_side_bytes - end_address >= 64) { while (left_side_bytes - end_address >= SIMD_512_WIDTH) {
__m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes); __m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes);
__m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes); __m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes);
@ -516,8 +516,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 64; left_side_bytes += SIMD_512_WIDTH;
right_side_bytes += 64; right_side_bytes += SIMD_512_WIDTH;
} }
} }
break; break;
@ -525,8 +525,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX2:
case HALLOCY_SIMD_AVX: { case HALLOCY_SIMD_AVX: {
if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) { if ((size_t)left_side_bytes % SIMD_256_WIDTH == (size_t)right_side_bytes % SIMD_256_WIDTH) {
while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) { while (((size_t)left_side_bytes % SIMD_256_WIDTH) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) { if (*left_side_bytes != *right_side_bytes) {
return false; return false;
} }
@ -535,7 +535,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
right_side_bytes += 1; right_side_bytes += 1;
} }
while (left_side_bytes - end_address >= 32) { while (left_side_bytes - end_address >= SIMD_256_WIDTH) {
__m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes); __m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes);
__m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes); __m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes);
@ -544,11 +544,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 32; left_side_bytes += SIMD_256_WIDTH;
right_side_bytes += 32; right_side_bytes += SIMD_256_WIDTH;
} }
} else { } else {
while (left_side_bytes - end_address >= 32) { while (left_side_bytes - end_address >= SIMD_256_WIDTH) {
__m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes); __m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes);
__m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes); __m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes);
@ -557,8 +557,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 32; left_side_bytes += SIMD_256_WIDTH;
right_side_bytes += 32; right_side_bytes += SIMD_256_WIDTH;
} }
} }
break; break;
@ -566,8 +566,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE2:
case HALLOCY_SIMD_SSE: { case HALLOCY_SIMD_SSE: {
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
if (*left_side_bytes != *right_side_bytes) { if (*left_side_bytes != *right_side_bytes) {
return false; return false;
} }
@ -585,11 +585,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 16; left_side_bytes += SIMD_128_WIDTH;
right_side_bytes += 16; right_side_bytes += SIMD_128_WIDTH;
} }
} else { } else {
while (left_side_bytes - end_address >= 16) { while (left_side_bytes - end_address >= SIMD_128_WIDTH) {
__m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes); __m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes);
__m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes); __m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes);
@ -598,8 +598,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
return false; return false;
} }
left_side_bytes += 16; left_side_bytes += SIMD_128_WIDTH;
right_side_bytes += 16; right_side_bytes += SIMD_128_WIDTH;
} }
} }
break; break;

View file

@ -21,9 +21,8 @@
*/ */
#include "../../Include/Hallocy/Utils/Simd.h" #include "../../Include/Hallocy/Utils/Simd.h"
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
HallocySimdType hallocy_is_simd_supported(void) { HallocySimdType hallocy_is_simd_supported(void) {
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) { if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) {
return hallocy_supported_simd; return hallocy_supported_simd;
} }