diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..4cc39b5 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,11 @@ +Checks: > + clang-analyzer-*, + bugprone-*, + performance-*, + readability-*, + portability-*, + cppcoreguidelines-*, + -clang-analyzer-osx*, + -cppcoreguidelines-pro-type-vararg +WarningsAsErrors: '' +FormatStyle: file diff --git a/.gitignore b/.gitignore index 2d82e85..7ab93c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -Build \ No newline at end of file +Build +compile_commands.json +.cache \ No newline at end of file diff --git a/Include/Hallocy/Core/Allocator.h b/Include/Hallocy/Core/Allocator.h index 3699476..014b4ff 100644 --- a/Include/Hallocy/Core/Allocator.h +++ b/Include/Hallocy/Core/Allocator.h @@ -28,11 +28,11 @@ #include "../Utils/Error.h" -void *hallocy_allocate(const size_t size, const bool zero_memory); +void *hallocy_allocate(size_t size, bool zero_memory); static inline void *hallocy_malloc(const size_t size) { return hallocy_allocate(size, false); } static inline void *hallocy_calloc(const size_t size, size_t count) { return hallocy_allocate(size * count, true); } -void *hallocy_realloc(void *memory_pointer, const size_t size); +void *hallocy_realloc(void *memory_pointer, size_t size); HallocyError hallocy_free(void *pointer); -#endif \ No newline at end of file +#endif diff --git a/Include/Hallocy/Core/Memory.h b/Include/Hallocy/Core/Memory.h index 29e78d3..fad05bf 100644 --- a/Include/Hallocy/Core/Memory.h +++ b/Include/Hallocy/Core/Memory.h @@ -28,9 +28,9 @@ #include "../Utils/Error.h" -HallocyError hallocy_set_memory(void *destination, int value, const size_t size); -HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size); -HallocyError hallocy_move_memory(void *destination, void *source, const size_t size); -bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size); +HallocyError hallocy_set_memory(void *destination, int value, size_t size); +HallocyError hallocy_copy_memory(void *destination, void *source, size_t size); +HallocyError hallocy_move_memory(void *destination, void *source, size_t size); +bool hallocy_compare_memory(void *left_side, void *right_side, size_t size); -#endif \ No newline at end of file +#endif diff --git a/Include/Hallocy/Utils/Error.h b/Include/Hallocy/Utils/Error.h index 3e65784..fc87501 100644 --- a/Include/Hallocy/Utils/Error.h +++ b/Include/Hallocy/Utils/Error.h @@ -31,4 +31,4 @@ typedef enum { HALLOCY_ERROR_INVALID_PARAM = 4 } HallocyError; -#endif \ No newline at end of file +#endif diff --git a/Include/Hallocy/Utils/Simd.h b/Include/Hallocy/Utils/Simd.h index 3860e58..92d1ab6 100644 --- a/Include/Hallocy/Utils/Simd.h +++ b/Include/Hallocy/Utils/Simd.h @@ -39,6 +39,11 @@ #endif #endif +#define SIMD_64_WIDTH 8 +#define SIMD_128_WIDTH 16 +#define SIMD_256_WIDTH 32 +#define SIMD_512_WIDTH 64 + typedef enum { HALLOCY_SIMD_UNDEFINED = 0, HALLOCY_SIMD_NONE = 1, @@ -50,6 +55,6 @@ typedef enum { HALLOCY_SIMD_NEON = 7 } HallocySimdType; -HallocySimdType hallocy_is_simd_supported(); +HallocySimdType hallocy_is_simd_supported(void); -#endif \ No newline at end of file +#endif diff --git a/Src/Core/Allocator.c b/Src/Core/Allocator.c index d703421..a4f4ea9 100644 --- a/Src/Core/Allocator.c +++ b/Src/Core/Allocator.c @@ -55,7 +55,6 @@ static _Thread_local size_t hallocy_small_memory_freed = 0; static _Thread_local size_t hallocy_small_memory_allocated = 0; static _Thread_local HallocyMemoryHeader *hallocy_small_memory_bin = NULL; -static size_t page_size = 0; static size_t hallocy_small_allocation_size = 0; static size_t hallocy_medium_allocation_size = 0; @@ -70,6 +69,7 @@ static BOOL CALLBACK hallocy_initialize_mutex(PINIT_ONCE init_once, PVOID parame #endif void *hallocy_allocate(const size_t size, const bool zero_memory) { + static size_t page_size = 0; if (page_size == 0) { #if defined(_WIN32) SYSTEM_INFO system_info; @@ -431,4 +431,4 @@ HallocyError hallocy_free(void *pointer) { } return HALLOCY_ERROR_NONE; -} \ No newline at end of file +} diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 3fc4417..7c633d3 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -39,44 +39,44 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size) switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } uint8x16_t simd_value = vdupq_n_u8(value_bytes); - while (destination_bytes - end_address >= 16) { + while (destination_bytes - end_address >= SIMD_128_WIDTH) { vst1q_u8(destination_bytes, simd_value); - destination_bytes += 16; + destination_bytes += SIMD_128_WIDTH; } break; } #else case HALLOCY_SIMD_AVX512: { - while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } - __m512i simd_value = _mm512_set1_epi8(value_bytes); - while (destination_bytes - end_address >= 64) { + __m512i simd_value = _mm512_set1_epi8((char)value_bytes); + while (destination_bytes - end_address >= SIMD_512_WIDTH) { _mm512_store_si512((__m512i*)destination_bytes, simd_value); - destination_bytes += 64; + destination_bytes += SIMD_512_WIDTH; } break; } case HALLOCY_SIMD_AVX2: { - while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } - __m256i simd_value = _mm256_set1_epi8(value_bytes); - while (destination_bytes - end_address >= 32) { + __m256i simd_value = _mm256_set1_epi8((char)value_bytes); + while (destination_bytes - end_address >= SIMD_256_WIDTH) { _mm256_store_si256((__m256i*)destination_bytes, simd_value); - destination_bytes += 32; + destination_bytes += SIMD_256_WIDTH; } break; @@ -85,15 +85,15 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size) case HALLOCY_SIMD_AVX: case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } - __m128i simd_value = _mm_set1_epi8(value_bytes); - while (destination_bytes - end_address >= 16) { + __m128i simd_value = _mm_set1_epi8((char)value_bytes); + while (destination_bytes - end_address >= SIMD_128_WIDTH) { _mm_store_si128((__m128i*)destination_bytes, simd_value); - destination_bytes += 16; + destination_bytes += SIMD_128_WIDTH; } break; @@ -108,7 +108,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size) size_t value_word = 0; for (size_t i = 0; i < word_size; i++) { - value_word |= (size_t)value_bytes << (i * 8); + value_word |= (size_t)value_bytes << (i * SIMD_64_WIDTH); } size_t *destination_word = (size_t*)destination_bytes; @@ -143,8 +143,8 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { - if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; @@ -152,39 +152,39 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s } uint8x16_t simd_value; - while (destination_bytes - end_address >= 16) { + while (destination_bytes - end_address >= SIMD_128_WIDTH) { simd_value = vdupq_n_u8(source_bytes); vst1q_u8(destination_bytes, simd_value); - destination_bytes += 16; - source_bytes += 16; + destination_bytes += SIMD_128_WIDTH; + source_bytes += SIMD_128_WIDTH; } break; } #else case HALLOCY_SIMD_AVX512: { - if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { + while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m512i simd_value; - while (destination_bytes - end_address >= 64) { + while (destination_bytes - end_address >= SIMD_512_WIDTH) { simd_value = _mm512_load_si512((__m512i*)source_bytes); _mm512_store_si512((__m512i*)destination_bytes, simd_value); - destination_bytes += 64; - source_bytes += 64; + destination_bytes += SIMD_512_WIDTH; + source_bytes += SIMD_512_WIDTH; } } else { __m512i simd_value; - while (destination_bytes - end_address >= 64) { + while (destination_bytes - end_address >= SIMD_512_WIDTH) { simd_value = _mm512_loadu_si512((__m512i*)source_bytes); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); - destination_bytes += 64; - source_bytes += 64; + destination_bytes += SIMD_512_WIDTH; + source_bytes += SIMD_512_WIDTH; } } break; @@ -192,29 +192,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { - if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_256_WIDTH) { + while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m256i simd_value; - while (destination_bytes - end_address >= 32) { + while (destination_bytes - end_address >= SIMD_256_WIDTH) { simd_value = _mm256_load_si256((__m256i*)source_bytes); _mm256_store_si256((__m256i*)destination_bytes, simd_value); - destination_bytes += 32; - source_bytes += 32; + destination_bytes += SIMD_256_WIDTH; + source_bytes += SIMD_256_WIDTH; } } else { __m256i simd_value; - while (destination_bytes - end_address >= 32) { + while (destination_bytes - end_address >= SIMD_256_WIDTH) { simd_value = _mm256_loadu_si256((__m256i*)source_bytes); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); - destination_bytes += 32; - source_bytes += 32; + destination_bytes += SIMD_256_WIDTH; + source_bytes += SIMD_256_WIDTH; } } break; @@ -222,29 +222,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { - if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m128i simd_value; - while (destination_bytes - end_address >= 16) { + while (destination_bytes - end_address >= SIMD_128_WIDTH) { simd_value = _mm_load_si128((__m128i*)source_bytes); _mm_store_si128((__m128i*)destination_bytes, simd_value); - destination_bytes += 16; - source_bytes += 16; + destination_bytes += SIMD_128_WIDTH; + source_bytes += SIMD_128_WIDTH; } } else { __m128i simd_value; - while (destination_bytes - end_address >= 16) { + while (destination_bytes - end_address >= SIMD_128_WIDTH) { simd_value = _mm_loadu_si128((__m128i*)source_bytes); _mm_storeu_si128((__m128i*)destination_bytes, simd_value); - destination_bytes += 16; - source_bytes += 16; + destination_bytes += SIMD_128_WIDTH; + source_bytes += SIMD_128_WIDTH; } } break; @@ -301,8 +301,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { - if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; @@ -310,9 +310,9 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s } uint8x16_t simd_value; - while (end_address - destination_bytes >= 16) { - destination_bytes -= 16; - source_bytes -= 16; + while (end_address - destination_bytes >= SIMD_128_WIDTH) { + destination_bytes -= SIMD_128_WIDTH; + source_bytes -= SIMD_128_WIDTH; simd_value = vdupq_n_u8(source_bytes); vst1q_u8(destination_bytes, simd_value); @@ -321,26 +321,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s } #else case HALLOCY_SIMD_AVX512: { - if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { + while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } __m512i simd_value; - while (end_address - destination_bytes >= 64) { - destination_bytes -= 64; - source_bytes -= 64; + while (end_address - destination_bytes >= SIMD_512_WIDTH) { + destination_bytes -= SIMD_512_WIDTH; + source_bytes -= SIMD_512_WIDTH; simd_value = _mm512_load_si512((__m512i*)source_bytes); _mm512_store_si512((__m512i*)destination_bytes, simd_value); } } else { __m512i simd_value; - while (end_address - destination_bytes >= 64) { - destination_bytes -= 64; - source_bytes -= 64; + while (end_address - destination_bytes >= SIMD_512_WIDTH) { + destination_bytes -= SIMD_512_WIDTH; + source_bytes -= SIMD_512_WIDTH; simd_value = _mm512_loadu_si512((__m512i*)source_bytes); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); @@ -351,8 +351,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { - if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { + while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; @@ -360,18 +360,18 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s } __m256i simd_value; - while (end_address - destination_bytes >= 32) { - destination_bytes -= 32; - source_bytes -= 32; + while (end_address - destination_bytes >= SIMD_256_WIDTH) { + destination_bytes -= SIMD_256_WIDTH; + source_bytes -= SIMD_256_WIDTH; simd_value = _mm256_load_si256((__m256i*)source_bytes); _mm256_store_si256((__m256i*)destination_bytes, simd_value); } } else { __m256i simd_value; - while (end_address - destination_bytes >= 32) { - destination_bytes -= 32; - source_bytes -= 32; + while (end_address - destination_bytes >= SIMD_256_WIDTH) { + destination_bytes -= SIMD_256_WIDTH; + source_bytes -= SIMD_256_WIDTH; simd_value = _mm256_loadu_si256((__m256i*)source_bytes); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); @@ -382,26 +382,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { - if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { - while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { + if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { + while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } __m128i simd_value; - while (end_address - destination_bytes >= 16) { - destination_bytes -= 16; - source_bytes -= 16; + while (end_address - destination_bytes >= SIMD_128_WIDTH) { + destination_bytes -= SIMD_128_WIDTH; + source_bytes -= SIMD_128_WIDTH; simd_value = _mm_load_si128((__m128i*)source_bytes); _mm_store_si128((__m128i*)destination_bytes, simd_value); } } else { __m128i simd_value; - while (end_address - destination_bytes >= 16) { - destination_bytes -= 16; - source_bytes -= 16; + while (end_address - destination_bytes >= SIMD_128_WIDTH) { + destination_bytes -= SIMD_128_WIDTH; + source_bytes -= SIMD_128_WIDTH; simd_value = _mm_loadu_si128((__m128i*)source_bytes); _mm_storeu_si128((__m128i*)destination_bytes, simd_value); @@ -457,8 +457,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { - if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { - while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { + if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) { + while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } @@ -468,7 +468,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size } } - while (end_address - destination_bytes >= 16) { + while (end_address - destination_bytes >= SIMD_128_WIDTH) { uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes); uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes); @@ -477,15 +477,15 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 16; - right_side_bytes += 16 + left_side_bytes += SIMD_128_WIDTH; + right_side_bytes += SIMD_128_WIDTH; } break; } #else case HALLOCY_SIMD_AVX512: { - if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) { - while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) { + if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) { + while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } @@ -494,7 +494,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size right_side_bytes += 1; } - while (left_side_bytes - end_address >= 64) { + while (left_side_bytes - end_address >= SIMD_512_WIDTH) { __m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes); __m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes); @@ -503,11 +503,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 64; - right_side_bytes += 64; + left_side_bytes += SIMD_512_WIDTH; + right_side_bytes += SIMD_512_WIDTH; } } else { - while (left_side_bytes - end_address >= 64) { + while (left_side_bytes - end_address >= SIMD_512_WIDTH) { __m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes); __m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes); @@ -516,8 +516,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 64; - right_side_bytes += 64; + left_side_bytes += SIMD_512_WIDTH; + right_side_bytes += SIMD_512_WIDTH; } } break; @@ -525,8 +525,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { - if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) { - while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) { + if ((size_t)left_side_bytes % SIMD_256_WIDTH == (size_t)right_side_bytes % SIMD_256_WIDTH) { + while (((size_t)left_side_bytes % SIMD_256_WIDTH) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } @@ -535,7 +535,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size right_side_bytes += 1; } - while (left_side_bytes - end_address >= 32) { + while (left_side_bytes - end_address >= SIMD_256_WIDTH) { __m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes); __m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes); @@ -544,11 +544,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 32; - right_side_bytes += 32; + left_side_bytes += SIMD_256_WIDTH; + right_side_bytes += SIMD_256_WIDTH; } } else { - while (left_side_bytes - end_address >= 32) { + while (left_side_bytes - end_address >= SIMD_256_WIDTH) { __m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes); __m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes); @@ -557,8 +557,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 32; - right_side_bytes += 32; + left_side_bytes += SIMD_256_WIDTH; + right_side_bytes += SIMD_256_WIDTH; } } break; @@ -566,8 +566,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { - if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { - while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { + if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) { + while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } @@ -576,7 +576,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size right_side_bytes += 1; } - while (left_side_bytes - end_address >= 16) { + while (left_side_bytes - end_address >= SIMD_128_WIDTH) { __m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes); __m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes); @@ -585,11 +585,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 16; - right_side_bytes += 16; + left_side_bytes += SIMD_128_WIDTH; + right_side_bytes += SIMD_128_WIDTH; } } else { - while (left_side_bytes - end_address >= 16) { + while (left_side_bytes - end_address >= SIMD_128_WIDTH) { __m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes); __m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes); @@ -598,8 +598,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size return false; } - left_side_bytes += 16; - right_side_bytes += 16; + left_side_bytes += SIMD_128_WIDTH; + right_side_bytes += SIMD_128_WIDTH; } } break; @@ -643,4 +643,4 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size } return true; -} \ No newline at end of file +} diff --git a/Src/Utils/Simd.c b/Src/Utils/Simd.c index b06de6a..e4be20a 100644 --- a/Src/Utils/Simd.c +++ b/Src/Utils/Simd.c @@ -21,9 +21,8 @@ */ #include "../../Include/Hallocy/Utils/Simd.h" -static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED; - -HallocySimdType hallocy_is_simd_supported() { +HallocySimdType hallocy_is_simd_supported(void) { + static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED; if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) { return hallocy_supported_simd; } @@ -130,4 +129,4 @@ HallocySimdType hallocy_is_simd_supported() { hallocy_supported_simd = HALLOCY_SIMD_NONE; return hallocy_supported_simd; -} \ No newline at end of file +}