Merge pull request 'h7-fix-android-build' (#14) from h7-fix-android-build into main

Reviewed-on: #14
This commit is contained in:
Mineplay 2025-10-16 17:25:34 -05:00
commit 9cef673a7e
4 changed files with 102 additions and 120 deletions

View file

@ -16,11 +16,13 @@ target_link_libraries(HallocyTest Hallocy)
if (MSVC)
target_compile_options(Hallocy PRIVATE /W4 /Zl)
else()
target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl)
target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl)
target_compile_options(Hallocy PRIVATE -march=native)
target_compile_options(HallocyTest PRIVATE -march=native)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
target_compile_options(Hallocy PRIVATE -mavx512f -mavx512vl -march=native)
target_compile_options(HallocyTest PRIVATE -mavx512f -mavx512vl -march=native)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
target_compile_options(Hallocy PRIVATE -mfpu=neon)
target_compile_options(HallocyTest PRIVATE -mfpu=neon)
endif()
target_compile_options(Hallocy PRIVATE -Wall -Wextra -pedantic)
endif()

View file

@ -23,20 +23,20 @@
#ifndef HALLOCY_SIMD
#define HALLOCY_SIMD
#if defined(_MSC_VER)
#if defined(_M_ARM64)
#include <arm64intr.h>
#else
#include <intrin.h>
#endif
#else
#if defined(__aarch64__)
#include <arm64intr.h>
#elif defined(__arm__)
#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM))
#define WIN_NEON
#include <arm_neon.h>
#else
#elif defined(_MSC_VER)
#define WIN_SIMD
#include <intrin.h>
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#define LIN_SIMD
#include <immintrin.h>
#endif
#elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__))
#define LIN_NEON
#include <arm_neon.h>
#else
#warning "SIMD is unsupported by this architecture or compiler (only x86/x64/ARM/ARM64 supported)."
#endif
#define SIMD_64_WIDTH 8

View file

@ -23,9 +23,6 @@
#include "../../Include/Hallocy/Core/Memory.h"
#include "../../Include/Hallocy/Utils/Simd.h"
#include <immintrin.h>
#include <stddef.h>
HallocyError hallocy_set_memory(void *destination, int value, const size_t size) {
if (destination == NULL) {
return HALLOCY_ERROR_INVALID_POINTER;
@ -37,7 +34,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
unsigned char value_bytes = (unsigned char)value;
switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
#if defined(LIN_NEON) || defined(WIN_NEON)
case HALLOCY_SIMD_NEON: {
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes;
@ -51,7 +48,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
}
break;
}
#else
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
case HALLOCY_SIMD_AVX512: {
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
*destination_bytes = value_bytes;
@ -141,7 +138,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
unsigned char *end_address = destination_bytes + size;
switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
#if defined(LIN_NEON) || defined(WIN_NEON)
case HALLOCY_SIMD_NEON: {
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
@ -153,14 +150,14 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
uint8x16_t simd_value;
while (destination_bytes - end_address >= SIMD_128_WIDTH) {
simd_value = vdupq_n_u8(source_bytes);
simd_value = vdupq_n_u8(*source_bytes);
vst1q_u8(destination_bytes, simd_value);
destination_bytes += SIMD_128_WIDTH;
source_bytes += SIMD_128_WIDTH;
}
break;
}
#else
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
case HALLOCY_SIMD_AVX512: {
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
@ -299,7 +296,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
unsigned char *source_bytes = (unsigned char*)source + size;
switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
#if defined(LIN_NEON) || defined(WIN_NEON)
case HALLOCY_SIMD_NEON: {
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
@ -314,12 +311,12 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
destination_bytes -= SIMD_128_WIDTH;
source_bytes -= SIMD_128_WIDTH;
simd_value = vdupq_n_u8(source_bytes);
simd_value = vdupq_n_u8(*source_bytes);
vst1q_u8(destination_bytes, simd_value);
}
break;
}
#else
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
case HALLOCY_SIMD_AVX512: {
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
@ -455,7 +452,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
unsigned char *end_address = left_side_bytes + size;
switch (hallocy_is_simd_supported()) {
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
#if defined(LIN_NEON) || defined(WIN_NEON)
case HALLOCY_SIMD_NEON: {
if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
@ -468,9 +465,9 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
}
}
while (end_address - destination_bytes >= SIMD_128_WIDTH) {
uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes);
uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes);
while (left_side_bytes - end_address >= SIMD_128_WIDTH) {
uint8x16_t simd_left_side_value = vdupq_n_u8(*left_side_bytes);
uint8x16_t simd_right_side_value = vdupq_n_u8(*right_side_bytes);
uint8x16_t result = vceqq_u8(simd_left_side_value, simd_right_side_value);
if (vmaxvq_u8(result) != 0xFF) {
@ -482,7 +479,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
}
break;
}
#else
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
case HALLOCY_SIMD_AVX512: {
if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) {
while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) {

View file

@ -21,110 +21,93 @@
*/
#include "../../Include/Hallocy/Utils/Simd.h"
#ifdef LIN_NEON
#include <sys/auxv.h>
#endif
HallocySimdType hallocy_is_simd_supported(void) {
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) {
return hallocy_supported_simd;
}
#if defined(_MSC_VER)
#if defined(_M_ARM64)
if (isProcessorFeaturePresent(PF_ARM64_SVE)) {
hallocy_supported_simd = HALLOCY_SIMD_NEON;
return hallocy_supported_simd;
}
#else
int cpu_info[4] = { 0 };
__cpuid(cpu_info, 7);
if ((cpu_info[1] & (1 << 16)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
return hallocy_supported_simd;
}
#if defined(WIN_NEON)
if (isProcessorFeaturePresent(PF_ARM64_SVE)) {
hallocy_supported_simd = HALLOCY_SIMD_NEON;
return hallocy_supported_simd;
}
#elif defined(WIN_SIMD)
int cpu_info[4] = { 0 };
__cpuid(cpu_info, 7);
if ((cpu_info[1] & (1 << 16)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
return hallocy_supported_simd;
}
if ((cpu_info[1] & (1 << 5)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
return hallocy_supported_simd;
}
if ((cpu_info[1] & (1 << 5)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
return hallocy_supported_simd;
}
__cpuid(cpu_info, 1);
__cpuid(cpu_info, 1);
if ((cpu_info[2] & (1 << 28)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX;
return hallocy_supported_simd;
}
if ((cpu_info[2] & (1 << 28)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX;
return hallocy_supported_simd;
}
if ((cpu_info[3] & (1 << 26)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
if ((cpu_info[3] & (1 << 26)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
if ((cpu_info[3] & (1 << 25)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
#endif
#else
#if defined(__aarch64__) || defined(__arm__)
int file_descriptor = open("/proc/cpuinfo", O_READONLY);
if (file_descriptor == -1) {
return hallocy_supported_simd;
}
if ((cpu_info[3] & (1 << 25)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
#elif defined(LIN_NEON)
if (getauxval(16) & (1 << 12)) {
hallocy_supported_simd = HALLOCY_SIMD_NEON;
return hallocy_supported_simd;
}
#elif defined(LIN_SIMD)
unsigned int a, b, c, d;
__asm__ __volatile__ (
"cpuid"
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
: "a" (7)
);
char buffer[256];
int bytes_read = read(file_descriptor, buffer, sizeof(buffer));
while (bytes_read > 0) {
for (size_t i = 0; i < bytes_read - 4; i++) {
if (buffer[i] == 'n' && buffer[i + 1] == 'e' && buffer[i + 2] == 'o' && buffer[i + 3] == 'n') {
close(file_descriptor);
if ((b & (1 << 16)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
return hallocy_supported_simd;
}
hallocy_supported_simd = HALLOCY_SIMD_NEON;
return hallocy_supported_simd;
}
}
if ((b & (1 << 5)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
return hallocy_supported_simd;
}
bytes_read = read(file_descriptor, buffer, sizeof(buffer));
}
__asm__ __volatile__ (
"cpuid"
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
: "a" (1)
);
close(file_descriptor);
#else
unsigned int a, b, c, d;
__asm__ __volatile__ (
"cpuid"
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
: "a" (7)
);
if ((c & (1 << 28)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX;
return hallocy_supported_simd;
}
if ((b & (1 << 16)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
return hallocy_supported_simd;
}
if ((c & (1 << 26)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
if ((b & (1 << 5)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
return hallocy_supported_simd;
}
__asm__ __volatile__ (
"cpuid"
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
: "a" (1)
);
if ((c & (1 << 28)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_AVX;
return hallocy_supported_simd;
}
if ((c & (1 << 26)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
return hallocy_supported_simd;
}
if ((c & (1 << 25)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE;
return hallocy_supported_simd;
}
#endif
if ((c & (1 << 25)) != 0) {
hallocy_supported_simd = HALLOCY_SIMD_SSE;
return hallocy_supported_simd;
}
#endif
hallocy_supported_simd = HALLOCY_SIMD_NONE;