From 061cb514fe65761e3ed978632c0365bd1520f77d Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 10:33:50 -0500 Subject: [PATCH 1/8] fix(android build): fixed unsused includes --- Src/Core/Memory.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 7c633d3..d49ab14 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -23,9 +23,6 @@ #include "../../Include/Hallocy/Core/Memory.h" #include "../../Include/Hallocy/Utils/Simd.h" -#include -#include - HallocyError hallocy_set_memory(void *destination, int value, const size_t size) { if (destination == NULL) { return HALLOCY_ERROR_INVALID_POINTER; From 58ba6ee2897311a3b8613e50eac2dcdc6615aa4f Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:08:54 -0500 Subject: [PATCH 2/8] fix(android build): changed way that simd is detected at build and run time --- Include/Hallocy/Utils/Simd.h | 24 +++--- Src/Core/Memory.c | 16 ++-- Src/Utils/Simd.c | 157 ++++++++++++++++------------------- 3 files changed, 90 insertions(+), 107 deletions(-) diff --git a/Include/Hallocy/Utils/Simd.h b/Include/Hallocy/Utils/Simd.h index 92d1ab6..ea6cae0 100644 --- a/Include/Hallocy/Utils/Simd.h +++ b/Include/Hallocy/Utils/Simd.h @@ -23,20 +23,20 @@ #ifndef HALLOCY_SIMD #define HALLOCY_SIMD -#if defined(_MSC_VER) - #if defined(_M_ARM64) - #include - #else - #include - #endif -#else - #if defined(__aarch64__) - #include - #elif defined(__arm__) +#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM)) + #define WIN_NEON #include - #else +#elif defined(_MSC_VER) + #define WIN_SIMD + #include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + #define LIN_SIMD #include - #endif +#elif defined(__GNUC__) && defined(__ARM_NEON__) + #define LIN_NEON + #include +#else + #warning "SIMD is unsupported by this architecture or compiler (only x86/x64/ARM/ARM64 supported)." #endif #define SIMD_64_WIDTH 8 diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index d49ab14..c9e0b99 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -34,7 +34,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size) unsigned char value_bytes = (unsigned char)value; switch (hallocy_is_simd_supported()) { - #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + #if defined(LIN_NEON) || defined(WIN_NEON) case HALLOCY_SIMD_NEON: { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; @@ -48,7 +48,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size) } break; } - #else + #elif defined(LIN_SIMD) || defined(WIN_SIMD) case HALLOCY_SIMD_AVX512: { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; @@ -138,7 +138,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s unsigned char *end_address = destination_bytes + size; switch (hallocy_is_simd_supported()) { - #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + #if defined(LIN_NEON) || defined(WIN_NEON) case HALLOCY_SIMD_NEON: { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { @@ -157,7 +157,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s } break; } - #else + #elif defined(LIN_SIMD) || defined(WIN_SIMD) case HALLOCY_SIMD_AVX512: { if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { @@ -296,7 +296,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s unsigned char *source_bytes = (unsigned char*)source + size; switch (hallocy_is_simd_supported()) { - #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + #if defined(LIN_NEON) || defined(WIN_NEON) case HALLOCY_SIMD_NEON: { if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) { while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) { @@ -316,7 +316,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s } break; } - #else + #elif defined(LIN_SIMD) || defined(WIN_SIMD) case HALLOCY_SIMD_AVX512: { if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) { while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) { @@ -452,7 +452,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size unsigned char *end_address = left_side_bytes + size; switch (hallocy_is_simd_supported()) { - #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) + #if defined(LIN_NEON) || defined(WIN_NEON) case HALLOCY_SIMD_NEON: { if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) { while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) { @@ -479,7 +479,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size } break; } - #else + #elif defined(LIN_SIMD) || defined(WIN_SIMD) case HALLOCY_SIMD_AVX512: { if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) { while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) { diff --git a/Src/Utils/Simd.c b/Src/Utils/Simd.c index e4be20a..70e15c6 100644 --- a/Src/Utils/Simd.c +++ b/Src/Utils/Simd.c @@ -21,110 +21,93 @@ */ #include "../../Include/Hallocy/Utils/Simd.h" +#ifdef LIN_NEON +#include +#include +#endif + HallocySimdType hallocy_is_simd_supported(void) { static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED; if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) { return hallocy_supported_simd; } - #if defined(_MSC_VER) - #if defined(_M_ARM64) - if (isProcessorFeaturePresent(PF_ARM64_SVE)) { - hallocy_supported_simd = HALLOCY_SIMD_NEON; - return hallocy_supported_simd; - } - #else - int cpu_info[4] = { 0 }; - __cpuid(cpu_info, 7); - if ((cpu_info[1] & (1 << 16)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX512; - return hallocy_supported_simd; - } + #if defined(WIN_NEON) + if (isProcessorFeaturePresent(PF_ARM64_SVE)) { + hallocy_supported_simd = HALLOCY_SIMD_NEON; + return hallocy_supported_simd; + } + #elif defined(WIN_SIMD) + int cpu_info[4] = { 0 }; + __cpuid(cpu_info, 7); + if ((cpu_info[1] & (1 << 16)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX512; + return hallocy_supported_simd; + } - if ((cpu_info[1] & (1 << 5)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX2; - return hallocy_supported_simd; - } + if ((cpu_info[1] & (1 << 5)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX2; + return hallocy_supported_simd; + } - __cpuid(cpu_info, 1); + __cpuid(cpu_info, 1); - if ((cpu_info[2] & (1 << 28)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX; - return hallocy_supported_simd; - } + if ((cpu_info[2] & (1 << 28)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX; + return hallocy_supported_simd; + } - if ((cpu_info[3] & (1 << 26)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_SSE2; - return hallocy_supported_simd; - } + if ((cpu_info[3] & (1 << 26)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_SSE2; + return hallocy_supported_simd; + } - if ((cpu_info[3] & (1 << 25)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_SSE2; - return hallocy_supported_simd; - } - #endif - #else - #if defined(__aarch64__) || defined(__arm__) - int file_descriptor = open("/proc/cpuinfo", O_READONLY); - if (file_descriptor == -1) { - return hallocy_supported_simd; - } + if ((cpu_info[3] & (1 << 25)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_SSE2; + return hallocy_supported_simd; + } + #elif defined(LIN_NEON) + if (getauxval(AT_HWCAP) & HWCAP_NEON) { + hallocy_is_simd_supported = HALLOCY_SIMD_NEON; + } + #elif defined(LIN_SIMD) + unsigned int a, b, c, d; + __asm__ __volatile__ ( + "cpuid" + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) + : "a" (7) + ); - char buffer[256]; - int bytes_read = read(file_descriptor, buffer, sizeof(buffer)); - while (bytes_read > 0) { - for (size_t i = 0; i < bytes_read - 4; i++) { - if (buffer[i] == 'n' && buffer[i + 1] == 'e' && buffer[i + 2] == 'o' && buffer[i + 3] == 'n') { - close(file_descriptor); + if ((b & (1 << 16)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX512; + return hallocy_supported_simd; + } - hallocy_supported_simd = HALLOCY_SIMD_NEON; - return hallocy_supported_simd; - } - } + if ((b & (1 << 5)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX2; + return hallocy_supported_simd; + } - bytes_read = read(file_descriptor, buffer, sizeof(buffer)); - } + __asm__ __volatile__ ( + "cpuid" + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) + : "a" (1) + ); - close(file_descriptor); - #else - unsigned int a, b, c, d; - __asm__ __volatile__ ( - "cpuid" - : "=a" (a), "=b" (b), "=c" (c), "=d" (d) - : "a" (7) - ); + if ((c & (1 << 28)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_AVX; + return hallocy_supported_simd; + } - if ((b & (1 << 16)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX512; - return hallocy_supported_simd; - } + if ((c & (1 << 26)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_SSE2; + return hallocy_supported_simd; + } - if ((b & (1 << 5)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX2; - return hallocy_supported_simd; - } - - __asm__ __volatile__ ( - "cpuid" - : "=a" (a), "=b" (b), "=c" (c), "=d" (d) - : "a" (1) - ); - - if ((c & (1 << 28)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_AVX; - return hallocy_supported_simd; - } - - if ((c & (1 << 26)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_SSE2; - return hallocy_supported_simd; - } - - if ((c & (1 << 25)) != 0) { - hallocy_supported_simd = HALLOCY_SIMD_SSE; - return hallocy_supported_simd; - } - #endif + if ((c & (1 << 25)) != 0) { + hallocy_supported_simd = HALLOCY_SIMD_SSE; + return hallocy_supported_simd; + } #endif hallocy_supported_simd = HALLOCY_SIMD_NONE; From 5ab70241de27278273b2bbc8e9a755b177332ee3 Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:17:38 -0500 Subject: [PATCH 3/8] fix(android build): made cmake file work with none simd processors --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 499bee9..2dcbf97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,10 @@ target_link_libraries(HallocyTest Hallocy) if (MSVC) target_compile_options(Hallocy PRIVATE /W4 /Zl) else() - target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl) - target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl) + target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl) + endif() target_compile_options(Hallocy PRIVATE -march=native) target_compile_options(HallocyTest PRIVATE -march=native) From 19544228f2f710a96ed3568fc5a48adaf9f21b8e Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:43:29 -0500 Subject: [PATCH 4/8] fix(android build): made cmake file work with neon processors --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dcbf97..92eeda6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,12 +17,12 @@ if (MSVC) target_compile_options(Hallocy PRIVATE /W4 /Zl) else() if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") - target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl) - target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl) + target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl -march=native) + target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl -march=native) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + target_compile_options(Hallocy PRIVATE -mfpu=neon) + target_compile_options(HallocyTest PRIVATE -mfpu=neon) endif() - - target_compile_options(Hallocy PRIVATE -march=native) - target_compile_options(HallocyTest PRIVATE -march=native) target_compile_options(Hallocy PRIVATE -Wall -Wextra -pedantic) endif() \ No newline at end of file From a53db34b9bbdf5ff055fe41ae2a8689f01c96fa2 Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:48:44 -0500 Subject: [PATCH 5/8] fix(android build): added missing flag check for neon --- Include/Hallocy/Utils/Simd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/Hallocy/Utils/Simd.h b/Include/Hallocy/Utils/Simd.h index ea6cae0..9ce07bb 100644 --- a/Include/Hallocy/Utils/Simd.h +++ b/Include/Hallocy/Utils/Simd.h @@ -32,7 +32,7 @@ #elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #define LIN_SIMD #include -#elif defined(__GNUC__) && defined(__ARM_NEON__) +#elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__)) #define LIN_NEON #include #else From c0f40ecddf86747927a6f6a76aacaa044c3d861d Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:53:49 -0500 Subject: [PATCH 6/8] fix(android build): fixed typing of neon memory implementation --- Src/Core/Memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index c9e0b99..27b8ee5 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -150,7 +150,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s uint8x16_t simd_value; while (destination_bytes - end_address >= SIMD_128_WIDTH) { - simd_value = vdupq_n_u8(source_bytes); + simd_value = vdupq_n_u8(*source_bytes); vst1q_u8(destination_bytes, simd_value); destination_bytes += SIMD_128_WIDTH; source_bytes += SIMD_128_WIDTH; @@ -311,7 +311,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s destination_bytes -= SIMD_128_WIDTH; source_bytes -= SIMD_128_WIDTH; - simd_value = vdupq_n_u8(source_bytes); + simd_value = vdupq_n_u8(*source_bytes); vst1q_u8(destination_bytes, simd_value); } break; @@ -466,8 +466,8 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size } while (end_address - destination_bytes >= SIMD_128_WIDTH) { - uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes); - uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes); + uint8x16_t simd_left_side_value = vdupq_n_u8(*left_side_bytes); + uint8x16_t simd_right_side_value = vdupq_n_u8(*right_side_bytes); uint8x16_t result = vceqq_u8(simd_left_side_value, simd_right_side_value); if (vmaxvq_u8(result) != 0xFF) { From 4b87134390530814da58c34ab5fe0a797f1420b8 Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 11:55:41 -0500 Subject: [PATCH 7/8] fix(android build): fixed mistake in while comparison for compare memory function --- Src/Core/Memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Core/Memory.c b/Src/Core/Memory.c index 27b8ee5..caf20bf 100644 --- a/Src/Core/Memory.c +++ b/Src/Core/Memory.c @@ -465,7 +465,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size } } - while (end_address - destination_bytes >= SIMD_128_WIDTH) { + while (left_side_bytes - end_address >= SIMD_128_WIDTH) { uint8x16_t simd_left_side_value = vdupq_n_u8(*left_side_bytes); uint8x16_t simd_right_side_value = vdupq_n_u8(*right_side_bytes); From a5f43bce5a203ec33cff2d9a57c6eb77ab6ff79c Mon Sep 17 00:00:00 2001 From: Mineplay Date: Thu, 16 Oct 2025 12:07:15 -0500 Subject: [PATCH 8/8] fix(android build): fixed checking for simd version --- Src/Utils/Simd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Src/Utils/Simd.c b/Src/Utils/Simd.c index 70e15c6..46d7359 100644 --- a/Src/Utils/Simd.c +++ b/Src/Utils/Simd.c @@ -23,7 +23,6 @@ #ifdef LIN_NEON #include -#include #endif HallocySimdType hallocy_is_simd_supported(void) { @@ -67,8 +66,9 @@ HallocySimdType hallocy_is_simd_supported(void) { return hallocy_supported_simd; } #elif defined(LIN_NEON) - if (getauxval(AT_HWCAP) & HWCAP_NEON) { - hallocy_is_simd_supported = HALLOCY_SIMD_NEON; + if (getauxval(16) & (1 << 12)) { + hallocy_supported_simd = HALLOCY_SIMD_NEON; + return hallocy_supported_simd; } #elif defined(LIN_SIMD) unsigned int a, b, c, d;