h7-fix-android-build #14
3 changed files with 90 additions and 107 deletions
|
|
@ -23,20 +23,20 @@
|
|||
#ifndef HALLOCY_SIMD
|
||||
#define HALLOCY_SIMD
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_M_ARM64)
|
||||
#include <arm64intr.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#else
|
||||
#if defined(__aarch64__)
|
||||
#include <arm64intr.h>
|
||||
#elif defined(__arm__)
|
||||
#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM))
|
||||
#define WIN_NEON
|
||||
#include <arm_neon.h>
|
||||
#else
|
||||
#elif defined(_MSC_VER)
|
||||
#define WIN_SIMD
|
||||
#include <intrin.h>
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#define LIN_SIMD
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#elif defined(__GNUC__) && defined(__ARM_NEON__)
|
||||
#define LIN_NEON
|
||||
#include <arm_neon.h>
|
||||
#else
|
||||
#warning "SIMD is unsupported by this architecture or compiler (only x86/x64/ARM/ARM64 supported)."
|
||||
#endif
|
||||
|
||||
#define SIMD_64_WIDTH 8
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
|
|||
unsigned char value_bytes = (unsigned char)value;
|
||||
|
||||
switch (hallocy_is_simd_supported()) {
|
||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||
#if defined(LIN_NEON) || defined(WIN_NEON)
|
||||
case HALLOCY_SIMD_NEON: {
|
||||
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
*destination_bytes = value_bytes;
|
||||
|
|
@ -48,7 +48,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t size)
|
|||
}
|
||||
break;
|
||||
}
|
||||
#else
|
||||
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
|
||||
case HALLOCY_SIMD_AVX512: {
|
||||
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
*destination_bytes = value_bytes;
|
||||
|
|
@ -138,7 +138,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
|||
unsigned char *end_address = destination_bytes + size;
|
||||
|
||||
switch (hallocy_is_simd_supported()) {
|
||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||
#if defined(LIN_NEON) || defined(WIN_NEON)
|
||||
case HALLOCY_SIMD_NEON: {
|
||||
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
|
||||
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
|
|
@ -157,7 +157,7 @@ HallocyError hallocy_copy_memory(void *destination, void *source, const size_t s
|
|||
}
|
||||
break;
|
||||
}
|
||||
#else
|
||||
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
|
||||
case HALLOCY_SIMD_AVX512: {
|
||||
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
|
|
@ -296,7 +296,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
|||
unsigned char *source_bytes = (unsigned char*)source + size;
|
||||
|
||||
switch (hallocy_is_simd_supported()) {
|
||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||
#if defined(LIN_NEON) || defined(WIN_NEON)
|
||||
case HALLOCY_SIMD_NEON: {
|
||||
if ((size_t)destination_bytes % SIMD_128_WIDTH == (size_t)source_bytes % SIMD_128_WIDTH) {
|
||||
while (((size_t)destination_bytes % SIMD_128_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
|
|
@ -316,7 +316,7 @@ HallocyError hallocy_move_memory(void *destination, void *source, const size_t s
|
|||
}
|
||||
break;
|
||||
}
|
||||
#else
|
||||
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
|
||||
case HALLOCY_SIMD_AVX512: {
|
||||
if ((size_t)destination_bytes % SIMD_512_WIDTH == (size_t)source_bytes % SIMD_512_WIDTH) {
|
||||
while (((size_t)destination_bytes % SIMD_512_WIDTH) != 0 && destination_bytes != end_address) {
|
||||
|
|
@ -452,7 +452,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
|||
unsigned char *end_address = left_side_bytes + size;
|
||||
|
||||
switch (hallocy_is_simd_supported()) {
|
||||
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||
#if defined(LIN_NEON) || defined(WIN_NEON)
|
||||
case HALLOCY_SIMD_NEON: {
|
||||
if ((size_t)left_side_bytes % SIMD_128_WIDTH == (size_t)right_side_bytes % SIMD_128_WIDTH) {
|
||||
while (((size_t)left_side_bytes % SIMD_128_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||
|
|
@ -479,7 +479,7 @@ bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size
|
|||
}
|
||||
break;
|
||||
}
|
||||
#else
|
||||
#elif defined(LIN_SIMD) || defined(WIN_SIMD)
|
||||
case HALLOCY_SIMD_AVX512: {
|
||||
if ((size_t)left_side_bytes % SIMD_512_WIDTH == (size_t)right_side_bytes % SIMD_512_WIDTH) {
|
||||
while (((size_t)left_side_bytes % SIMD_512_WIDTH) != 0 && left_side_bytes != end_address) {
|
||||
|
|
|
|||
157
Src/Utils/Simd.c
157
Src/Utils/Simd.c
|
|
@ -21,110 +21,93 @@
|
|||
*/
|
||||
#include "../../Include/Hallocy/Utils/Simd.h"
|
||||
|
||||
#ifdef LIN_NEON
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/hwcap.h>
|
||||
#endif
|
||||
|
||||
HallocySimdType hallocy_is_simd_supported(void) {
|
||||
static HallocySimdType hallocy_supported_simd = HALLOCY_SIMD_UNDEFINED;
|
||||
if (hallocy_supported_simd != HALLOCY_SIMD_UNDEFINED) {
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_M_ARM64)
|
||||
if (isProcessorFeaturePresent(PF_ARM64_SVE)) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_NEON;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#else
|
||||
int cpu_info[4] = { 0 };
|
||||
__cpuid(cpu_info, 7);
|
||||
if ((cpu_info[1] & (1 << 16)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#if defined(WIN_NEON)
|
||||
if (isProcessorFeaturePresent(PF_ARM64_SVE)) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_NEON;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#elif defined(WIN_SIMD)
|
||||
int cpu_info[4] = { 0 };
|
||||
__cpuid(cpu_info, 7);
|
||||
if ((cpu_info[1] & (1 << 16)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((cpu_info[1] & (1 << 5)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
if ((cpu_info[1] & (1 << 5)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
__cpuid(cpu_info, 1);
|
||||
__cpuid(cpu_info, 1);
|
||||
|
||||
if ((cpu_info[2] & (1 << 28)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
if ((cpu_info[2] & (1 << 28)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((cpu_info[3] & (1 << 26)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
if ((cpu_info[3] & (1 << 26)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((cpu_info[3] & (1 << 25)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
int file_descriptor = open("/proc/cpuinfo", O_READONLY);
|
||||
if (file_descriptor == -1) {
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
if ((cpu_info[3] & (1 << 25)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#elif defined(LIN_NEON)
|
||||
if (getauxval(AT_HWCAP) & HWCAP_NEON) {
|
||||
hallocy_is_simd_supported = HALLOCY_SIMD_NEON;
|
||||
}
|
||||
#elif defined(LIN_SIMD)
|
||||
unsigned int a, b, c, d;
|
||||
__asm__ __volatile__ (
|
||||
"cpuid"
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
|
||||
: "a" (7)
|
||||
);
|
||||
|
||||
char buffer[256];
|
||||
int bytes_read = read(file_descriptor, buffer, sizeof(buffer));
|
||||
while (bytes_read > 0) {
|
||||
for (size_t i = 0; i < bytes_read - 4; i++) {
|
||||
if (buffer[i] == 'n' && buffer[i + 1] == 'e' && buffer[i + 2] == 'o' && buffer[i + 3] == 'n') {
|
||||
close(file_descriptor);
|
||||
if ((b & (1 << 16)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
hallocy_supported_simd = HALLOCY_SIMD_NEON;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
}
|
||||
if ((b & (1 << 5)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
bytes_read = read(file_descriptor, buffer, sizeof(buffer));
|
||||
}
|
||||
__asm__ __volatile__ (
|
||||
"cpuid"
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
|
||||
: "a" (1)
|
||||
);
|
||||
|
||||
close(file_descriptor);
|
||||
#else
|
||||
unsigned int a, b, c, d;
|
||||
__asm__ __volatile__ (
|
||||
"cpuid"
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
|
||||
: "a" (7)
|
||||
);
|
||||
if ((c & (1 << 28)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((b & (1 << 16)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX512;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
if ((c & (1 << 26)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((b & (1 << 5)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"cpuid"
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)
|
||||
: "a" (1)
|
||||
);
|
||||
|
||||
if ((c & (1 << 28)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_AVX;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((c & (1 << 26)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE2;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
|
||||
if ((c & (1 << 25)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#endif
|
||||
if ((c & (1 << 25)) != 0) {
|
||||
hallocy_supported_simd = HALLOCY_SIMD_SSE;
|
||||
return hallocy_supported_simd;
|
||||
}
|
||||
#endif
|
||||
|
||||
hallocy_supported_simd = HALLOCY_SIMD_NONE;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue