Compare commits
10 commits
e75542f6f2
...
db9d974f5e
| Author | SHA1 | Date | |
|---|---|---|---|
| db9d974f5e | |||
| 945137ee09 | |||
| afcdff4c17 | |||
| c642e75f1d | |||
| 4e545e649a | |||
| 73eeb4ef70 | |||
| 7e2b9e5045 | |||
| b985a99618 | |||
| d497356835 | |||
| 8359efa2bc |
9 changed files with 139 additions and 122 deletions
11
.clang-tidy
Normal file
11
.clang-tidy
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
Checks: >
|
||||||
|
clang-analyzer-*,
|
||||||
|
bugprone-*,
|
||||||
|
performance-*,
|
||||||
|
readability-*,
|
||||||
|
portability-*,
|
||||||
|
cppcoreguidelines-*,
|
||||||
|
-clang-analyzer-osx*,
|
||||||
|
-cppcoreguidelines-pro-type-vararg
|
||||||
|
WarningsAsErrors: ''
|
||||||
|
FormatStyle: file
|
||||||
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -1 +1,3 @@
|
||||||
Build
|
Build
|
||||||
|
compile_commands.json
|
||||||
|
.cache
|
||||||
|
|
@ -28,11 +28,11 @@
|
||||||
|
|
||||||
#include "../Utils/Error.h"
|
#include "../Utils/Error.h"
|
||||||
|
|
||||||
void *hallocy_allocate(const size_t size, const bool zero_memory);
|
void *hallocy_allocate(size_t size, bool zero_memory);
|
||||||
|
|
||||||
static inline void *hallocy_malloc(const size_t size) { return hallocy_allocate(size, false); }
|
static inline void *hallocy_malloc(const size_t size) { return hallocy_allocate(size, false); }
|
||||||
static inline void *hallocy_calloc(const size_t size, size_t count) { return hallocy_allocate(size * count, true); }
|
static inline void *hallocy_calloc(const size_t size, size_t count) { return hallocy_allocate(size * count, true); }
|
||||||
void *hallocy_realloc(void *memory_pointer, const size_t size);
|
void *hallocy_realloc(void *memory_pointer, size_t size);
|
||||||
HallocyError hallocy_free(void *pointer);
|
HallocyError hallocy_free(void *pointer);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,9 @@
|
||||||
|
|
||||||
#include "../Utils/Error.h"
|
#include "../Utils/Error.h"
|
||||||
|
|
||||||
HallocyError hallocy_set_memory(void *destination, int value, const size_t size);
|
HallocyError hallocy_set_memory(void *destination, int value, size_t size);
|
||||||
HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size);
|
HallocyError hallocy_copy_memory(void *destination, void *source, size_t size);
|
||||||
HallocyError hallocy_move_memory(void *destination, void *source, const size_t size);
|
HallocyError hallocy_move_memory(void *destination, void *source, size_t size);
|
||||||
bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size);
|
bool hallocy_compare_memory(void *left_side, void *right_side, size_t size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -31,4 +31,4 @@ typedef enum {
|
||||||
HALLOCY_ERROR_INVALID_PARAM = 4
|
HALLOCY_ERROR_INVALID_PARAM = 4
|
||||||
} HallocyError;
|
} HallocyError;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,11 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define SIMD_64_WIDTH 8
|
||||||
|
#define SIMD_128_WIDTH 16
|
||||||
|
#define SIMD_256_WIDTH 32
|
||||||
|
#define SIMD_512_WIDTH 64
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
HALLOCY_SIMD_UNDEFINED = 0,
|
HALLOCY_SIMD_UNDEFINED = 0,
|
||||||
HALLOCY_SIMD_NONE = 1,
|
HALLOCY_SIMD_NONE = 1,
|
||||||
|
|
@ -50,6 +55,6 @@ typedef enum {
|
||||||
HALLOCY_SIMD_NEON = 7
|
HALLOCY_SIMD_NEON = 7
|
||||||
} HallocySimdType;
|
} HallocySimdType;
|
||||||
|
|
||||||
HallocySimdType hallocy_is_simd_supported();
|
HallocySimdType hallocy_is_simd_supported(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,6 @@ static _Thread_local size_t hallocy_small_memory_freed = 0;
|
||||||
static _Thread_local size_t hallocy_small_memory_allocated = 0;
|
static _Thread_local size_t hallocy_small_memory_allocated = 0;
|
||||||
static _Thread_local HallocyMemoryHeader *hallocy_small_memory_bin = NULL;
|
static _Thread_local HallocyMemoryHeader *hallocy_small_memory_bin = NULL;
|
||||||
|
|
||||||
static size_t page_size = 0;
|
|
||||||
static size_t hallocy_small_allocation_size = 0;
|
static size_t hallocy_small_allocation_size = 0;
|
||||||
static size_t hallocy_medium_allocation_size = 0;
|
static size_t hallocy_medium_allocation_size = 0;
|
||||||
|
|
||||||
|
|
@ -70,6 +69,7 @@ static BOOL CALLBACK hallocy_initialize_mutex(PINIT_ONCE init_once, PVOID parame
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void *hallocy_allocate(const size_t size, const bool zero_memory) {
|
void *hallocy_allocate(const size_t size, const bool zero_memory) {
|
||||||
|
static size_t page_size = 0;
|
||||||
if (page_size == 0) {
|
if (page_size == 0) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
SYSTEM_INFO system_info;
|
SYSTEM_INFO system_info;
|
||||||
|
|
@ -431,4 +431,4 @@ HallocyError hallocy_free(void *pointer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
return HALLOCY_ERROR_NONE;
|
return HALLOCY_ERROR_NONE;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,44 +39,44 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
|
||||||
switch (hallocy_is_simd_supported()) {
|
switch (hallocy_is_simd_supported()) {
|
||||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||||
case HALLOCY_SIMD_NEON: {
|
case HALLOCY_SIMD_NEON: {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = value_bytes;
|
*destination_bytes = value_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8x16_t simd_value = vdupq_n_u8(value_bytes);
|
uint8x16_t simd_value = vdupq_n_u8(value_bytes);
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
vst1q_u8(destination_bytes, simd_value);
|
vst1q_u8(destination_bytes, simd_value);
|
||||||
destination_bytes += 16;
|
destination_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
case HALLOCY_SIMD_AVX512: {
|
case HALLOCY_SIMD_AVX512: {
|
||||||
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = value_bytes;
|
*destination_bytes = value_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m512i simd_value = _mm512_set1_epi8(value_bytes);
|
__m512i simd_value = _mm512_set1_epi8((char)value_bytes);
|
||||||
while (destination_bytes - end_address >= 64) {
|
while (destination_bytes - end_address >= SIMD_512_WIDTH) {
|
||||||
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 64;
|
destination_bytes += SIMD_512_WIDTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case HALLOCY_SIMD_AVX2: {
|
case HALLOCY_SIMD_AVX2: {
|
||||||
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = value_bytes;
|
*destination_bytes = value_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m256i simd_value = _mm256_set1_epi8(value_bytes);
|
__m256i simd_value = _mm256_set1_epi8((char)value_bytes);
|
||||||
while (destination_bytes - end_address >= 32) {
|
while (destination_bytes - end_address >= SIMD_256_WIDTH) {
|
||||||
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 32;
|
destination_bytes += SIMD_256_WIDTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
@ -85,15 +85,15 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
|
||||||
case HALLOCY_SIMD_AVX:
|
case HALLOCY_SIMD_AVX:
|
||||||
case HALLOCY_SIMD_SSE2:
|
case HALLOCY_SIMD_SSE2:
|
||||||
case HALLOCY_SIMD_SSE: {
|
case HALLOCY_SIMD_SSE: {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = value_bytes;
|
*destination_bytes = value_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i simd_value = _mm_set1_epi8(value_bytes);
|
__m128i simd_value = _mm_set1_epi8((char)value_bytes);
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 16;
|
destination_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
@ -108,7 +108,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
|
||||||
|
|
||||||
size_t value_word = 0;
|
size_t value_word = 0;
|
||||||
for (size_t i = 0; i < word_size; i++) {
|
for (size_t i = 0; i < word_size; i++) {
|
||||||
value_word |= (size_t)value_bytes << (i * 8);
|
value_word |= (size_t)value_bytes << (i * SIMD_64_WIDTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t *destination_word = (size_t*)destination_bytes;
|
size_t *destination_word = (size_t*)destination_bytes;
|
||||||
|
|
@ -143,8 +143,8 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
||||||
switch (hallocy_is_simd_supported()) {
|
switch (hallocy_is_simd_supported()) {
|
||||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||||
case HALLOCY_SIMD_NEON: {
|
case HALLOCY_SIMD_NEON: {
|
||||||
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) {
|
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
source_bytes += 1;
|
source_bytes += 1;
|
||||||
|
|
@ -152,39 +152,39 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8x16_t simd_value;
|
uint8x16_t simd_value;
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
simd_value = vdupq_n_u8(source_bytes);
|
simd_value = vdupq_n_u8(source_bytes);
|
||||||
vst1q_u8(destination_bytes, simd_value);
|
vst1q_u8(destination_bytes, simd_value);
|
||||||
destination_bytes += 16;
|
destination_bytes += SIMD_128_WIDTH;
|
||||||
source_bytes += 16;
|
source_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
case HALLOCY_SIMD_AVX512: {
|
case HALLOCY_SIMD_AVX512: {
|
||||||
if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
source_bytes += 1;
|
source_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m512i simd_value;
|
__m512i simd_value;
|
||||||
while (destination_bytes - end_address >= 64) {
|
while (destination_bytes - end_address >= SIMD_512_WIDTH) {
|
||||||
simd_value = _mm512_load_si512((__m512i*)source_bytes);
|
simd_value = _mm512_load_si512((__m512i*)source_bytes);
|
||||||
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 64;
|
destination_bytes += SIMD_512_WIDTH;
|
||||||
source_bytes += 64;
|
source_bytes += SIMD_512_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m512i simd_value;
|
__m512i simd_value;
|
||||||
while (destination_bytes - end_address >= 64) {
|
while (destination_bytes - end_address >= SIMD_512_WIDTH) {
|
||||||
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
|
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
|
||||||
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 64;
|
destination_bytes += SIMD_512_WIDTH;
|
||||||
source_bytes += 64;
|
source_bytes += SIMD_512_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -192,29 +192,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
||||||
|
|
||||||
case HALLOCY_SIMD_AVX2:
|
case HALLOCY_SIMD_AVX2:
|
||||||
case HALLOCY_SIMD_AVX: {
|
case HALLOCY_SIMD_AVX: {
|
||||||
if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_256_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
source_bytes += 1;
|
source_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m256i simd_value;
|
__m256i simd_value;
|
||||||
while (destination_bytes - end_address >= 32) {
|
while (destination_bytes - end_address >= SIMD_256_WIDTH) {
|
||||||
simd_value = _mm256_load_si256((__m256i*)source_bytes);
|
simd_value = _mm256_load_si256((__m256i*)source_bytes);
|
||||||
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 32;
|
destination_bytes += SIMD_256_WIDTH;
|
||||||
source_bytes += 32;
|
source_bytes += SIMD_256_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m256i simd_value;
|
__m256i simd_value;
|
||||||
while (destination_bytes - end_address >= 32) {
|
while (destination_bytes - end_address >= SIMD_256_WIDTH) {
|
||||||
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
|
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
|
||||||
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 32;
|
destination_bytes += SIMD_256_WIDTH;
|
||||||
source_bytes += 32;
|
source_bytes += SIMD_256_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -222,29 +222,29 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
||||||
|
|
||||||
case HALLOCY_SIMD_SSE2:
|
case HALLOCY_SIMD_SSE2:
|
||||||
case HALLOCY_SIMD_SSE: {
|
case HALLOCY_SIMD_SSE: {
|
||||||
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
source_bytes += 1;
|
source_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i simd_value;
|
__m128i simd_value;
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
simd_value = _mm_load_si128((__m128i*)source_bytes);
|
simd_value = _mm_load_si128((__m128i*)source_bytes);
|
||||||
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 16;
|
destination_bytes += SIMD_128_WIDTH;
|
||||||
source_bytes += 16;
|
source_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m128i simd_value;
|
__m128i simd_value;
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
|
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
|
||||||
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
||||||
|
|
||||||
destination_bytes += 16;
|
destination_bytes += SIMD_128_WIDTH;
|
||||||
source_bytes += 16;
|
source_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -301,8 +301,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
switch (hallocy_is_simd_supported()) {
|
switch (hallocy_is_simd_supported()) {
|
||||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||||
case HALLOCY_SIMD_NEON: {
|
case HALLOCY_SIMD_NEON: {
|
||||||
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) {
|
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
destination_bytes -= 1;
|
destination_bytes -= 1;
|
||||||
source_bytes -= 1;
|
source_bytes -= 1;
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
|
|
@ -310,9 +310,9 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8x16_t simd_value;
|
uint8x16_t simd_value;
|
||||||
while (end_address - destination_bytes >= 16) {
|
while (end_address - destination_bytes >= SIMD_128_WIDTH) {
|
||||||
destination_bytes -= 16;
|
destination_bytes -= SIMD_128_WIDTH;
|
||||||
source_bytes -= 16;
|
source_bytes -= SIMD_128_WIDTH;
|
||||||
|
|
||||||
simd_value = vdupq_n_u8(source_bytes);
|
simd_value = vdupq_n_u8(source_bytes);
|
||||||
vst1q_u8(destination_bytes, simd_value);
|
vst1q_u8(destination_bytes, simd_value);
|
||||||
|
|
@ -321,26 +321,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
case HALLOCY_SIMD_AVX512: {
|
case HALLOCY_SIMD_AVX512: {
|
||||||
if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
destination_bytes -= 1;
|
destination_bytes -= 1;
|
||||||
source_bytes -= 1;
|
source_bytes -= 1;
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m512i simd_value;
|
__m512i simd_value;
|
||||||
while (end_address - destination_bytes >= 64) {
|
while (end_address - destination_bytes >= SIMD_512_WIDTH) {
|
||||||
destination_bytes -= 64;
|
destination_bytes -= SIMD_512_WIDTH;
|
||||||
source_bytes -= 64;
|
source_bytes -= SIMD_512_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm512_load_si512((__m512i*)source_bytes);
|
simd_value = _mm512_load_si512((__m512i*)source_bytes);
|
||||||
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m512i simd_value;
|
__m512i simd_value;
|
||||||
while (end_address - destination_bytes >= 64) {
|
while (end_address - destination_bytes >= SIMD_512_WIDTH) {
|
||||||
destination_bytes -= 64;
|
destination_bytes -= SIMD_512_WIDTH;
|
||||||
source_bytes -= 64;
|
source_bytes -= SIMD_512_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
|
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
|
||||||
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
||||||
|
|
@ -351,8 +351,8 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
|
|
||||||
case HALLOCY_SIMD_AVX2:
|
case HALLOCY_SIMD_AVX2:
|
||||||
case HALLOCY_SIMD_AVX: {
|
case HALLOCY_SIMD_AVX: {
|
||||||
if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_256_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_256_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
destination_bytes -= 1;
|
destination_bytes -= 1;
|
||||||
source_bytes -= 1;
|
source_bytes -= 1;
|
||||||
|
|
||||||
|
|
@ -360,18 +360,18 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
}
|
}
|
||||||
|
|
||||||
__m256i simd_value;
|
__m256i simd_value;
|
||||||
while (end_address - destination_bytes >= 32) {
|
while (end_address - destination_bytes >= SIMD_256_WIDTH) {
|
||||||
destination_bytes -= 32;
|
destination_bytes -= SIMD_256_WIDTH;
|
||||||
source_bytes -= 32;
|
source_bytes -= SIMD_256_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm256_load_si256((__m256i*)source_bytes);
|
simd_value = _mm256_load_si256((__m256i*)source_bytes);
|
||||||
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m256i simd_value;
|
__m256i simd_value;
|
||||||
while (end_address - destination_bytes >= 32) {
|
while (end_address - destination_bytes >= SIMD_256_WIDTH) {
|
||||||
destination_bytes -= 32;
|
destination_bytes -= SIMD_256_WIDTH;
|
||||||
source_bytes -= 32;
|
source_bytes -= SIMD_256_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
|
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
|
||||||
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
||||||
|
|
@ -382,26 +382,26 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
||||||
|
|
||||||
case HALLOCY_SIMD_SSE2:
|
case HALLOCY_SIMD_SSE2:
|
||||||
case HALLOCY_SIMD_SSE: {
|
case HALLOCY_SIMD_SSE: {
|
||||||
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) {
|
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||||
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||||
destination_bytes -= 1;
|
destination_bytes -= 1;
|
||||||
source_bytes -= 1;
|
source_bytes -= 1;
|
||||||
*destination_bytes = *source_bytes;
|
*destination_bytes = *source_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i simd_value;
|
__m128i simd_value;
|
||||||
while (end_address - destination_bytes >= 16) {
|
while (end_address - destination_bytes >= SIMD_128_WIDTH) {
|
||||||
destination_bytes -= 16;
|
destination_bytes -= SIMD_128_WIDTH;
|
||||||
source_bytes -= 16;
|
source_bytes -= SIMD_128_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm_load_si128((__m128i*)source_bytes);
|
simd_value = _mm_load_si128((__m128i*)source_bytes);
|
||||||
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__m128i simd_value;
|
__m128i simd_value;
|
||||||
while (end_address - destination_bytes >= 16) {
|
while (end_address - destination_bytes >= SIMD_128_WIDTH) {
|
||||||
destination_bytes -= 16;
|
destination_bytes -= SIMD_128_WIDTH;
|
||||||
source_bytes -= 16;
|
source_bytes -= SIMD_128_WIDTH;
|
||||||
|
|
||||||
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
|
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
|
||||||
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
||||||
|
|
@ -457,8 +457,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
switch (hallocy_is_simd_supported()) {
|
switch (hallocy_is_simd_supported()) {
|
||||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||||
case HALLOCY_SIMD_NEON: {
|
case HALLOCY_SIMD_NEON: {
|
||||||
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) {
|
if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
|
||||||
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) {
|
while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||||
if (*left_side_bytes != *right_side_bytes) {
|
if (*left_side_bytes != *right_side_bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -468,7 +468,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (end_address - destination_bytes >= 16) {
|
while (end_address - destination_bytes >= SIMD_128_WIDTH) {
|
||||||
uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes);
|
uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes);
|
||||||
uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes);
|
uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -477,15 +477,15 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 16;
|
left_side_bytes += SIMD_128_WIDTH;
|
||||||
right_side_bytes += 16
|
right_side_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
case HALLOCY_SIMD_AVX512: {
|
case HALLOCY_SIMD_AVX512: {
|
||||||
if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) {
|
if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) {
|
||||||
while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) {
|
while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||||
if (*left_side_bytes != *right_side_bytes) {
|
if (*left_side_bytes != *right_side_bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -494,7 +494,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
right_side_bytes += 1;
|
right_side_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (left_side_bytes - end_address >= 64) {
|
while (left_side_bytes - end_address >= SIMD_512_WIDTH) {
|
||||||
__m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes);
|
__m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes);
|
||||||
__m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes);
|
__m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -503,11 +503,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 64;
|
left_side_bytes += SIMD_512_WIDTH;
|
||||||
right_side_bytes += 64;
|
right_side_bytes += SIMD_512_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while (left_side_bytes - end_address >= 64) {
|
while (left_side_bytes - end_address >= SIMD_512_WIDTH) {
|
||||||
__m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes);
|
__m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes);
|
||||||
__m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes);
|
__m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -516,8 +516,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 64;
|
left_side_bytes += SIMD_512_WIDTH;
|
||||||
right_side_bytes += 64;
|
right_side_bytes += SIMD_512_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -525,8 +525,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
|
|
||||||
case HALLOCY_SIMD_AVX2:
|
case HALLOCY_SIMD_AVX2:
|
||||||
case HALLOCY_SIMD_AVX: {
|
case HALLOCY_SIMD_AVX: {
|
||||||
if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) {
|
if ((size_t)left_side_bytes % SIMD_256_WIDTH == (size_t)right_side_bytes % SIMD_256_WIDTH) {
|
||||||
while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) {
|
while (((size_t)left_side_bytes % SIMD_256_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||||
if (*left_side_bytes != *right_side_bytes) {
|
if (*left_side_bytes != *right_side_bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -535,7 +535,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
right_side_bytes += 1;
|
right_side_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (left_side_bytes - end_address >= 32) {
|
while (left_side_bytes - end_address >= SIMD_256_WIDTH) {
|
||||||
__m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes);
|
__m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes);
|
||||||
__m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes);
|
__m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -544,11 +544,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 32;
|
left_side_bytes += SIMD_256_WIDTH;
|
||||||
right_side_bytes += 32;
|
right_side_bytes += SIMD_256_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while (left_side_bytes - end_address >= 32) {
|
while (left_side_bytes - end_address >= SIMD_256_WIDTH) {
|
||||||
__m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes);
|
__m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes);
|
||||||
__m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes);
|
__m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -557,8 +557,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 32;
|
left_side_bytes += SIMD_256_WIDTH;
|
||||||
right_side_bytes += 32;
|
right_side_bytes += SIMD_256_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -566,8 +566,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
|
|
||||||
case HALLOCY_SIMD_SSE2:
|
case HALLOCY_SIMD_SSE2:
|
||||||
case HALLOCY_SIMD_SSE: {
|
case HALLOCY_SIMD_SSE: {
|
||||||
if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) {
|
if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
|
||||||
while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) {
|
while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||||
if (*left_side_bytes != *right_side_bytes) {
|
if (*left_side_bytes != *right_side_bytes) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -576,7 +576,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
right_side_bytes += 1;
|
right_side_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (left_side_bytes - end_address >= 16) {
|
while (left_side_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
__m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes);
|
__m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes);
|
||||||
__m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes);
|
__m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -585,11 +585,11 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 16;
|
left_side_bytes += SIMD_128_WIDTH;
|
||||||
right_side_bytes += 16;
|
right_side_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while (left_side_bytes - end_address >= 16) {
|
while (left_side_bytes - end_address >= SIMD_128_WIDTH) {
|
||||||
__m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes);
|
__m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes);
|
||||||
__m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes);
|
__m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes);
|
||||||
|
|
||||||
|
|
@ -598,8 +598,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
left_side_bytes += 16;
|
left_side_bytes += SIMD_128_WIDTH;
|
||||||
right_side_bytes += 16;
|
right_side_bytes += SIMD_128_WIDTH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -643,4 +643,4 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,8 @@
|
||||||
*/
|
*/
|
||||||
#include "../../Include/Hallocy/Utils/Simd.h"
|
#include "../../Include/Hallocy/Utils/Simd.h"
|
||||||
|
|
||||||
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
|
HallocySimdType hallocy_is_simd_supported(void) {
|
||||||
|
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
|
||||||
HallocySimdType hallocy_is_simd_supported() {
|
|
||||||
if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) {
|
if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) {
|
||||||
return hallocy_supported_simd;
|
return hallocy_supported_simd;
|
||||||
}
|
}
|
||||||
|
|
@ -130,4 +129,4 @@ HallocySimdType hallocy_is_simd_supported() {
|
||||||
|
|
||||||
hallocy_supported_simd = HALLOCY_SIMD_NONE;
|
hallocy_supported_simd = HALLOCY_SIMD_NONE;
|
||||||
return hallocy_supported_simd;
|
return hallocy_supported_simd;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue