133 lines
No EOL
4.5 KiB
C
133 lines
No EOL
4.5 KiB
C
/*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* -----------------------------------------------------------------------------
|
|
* File: Memory.c
|
|
* Description:
|
|
* This file implements the functions for managing memory. It includes functions
|
|
* to copy, move, compare and set memory.
|
|
*
|
|
* Author: Mineplay
|
|
* -----------------------------------------------------------------------------
|
|
*/
|
|
#include "../../Include/Hallocy/Core/Memory.h"
|
|
#include "../../Include/Hallocy/Utils/Simd.h"
|
|
|
|
#include <immintrin.h>
|
|
#include <stddef.h>
|
|
|
|
HallocyError hallocy_set_memory(void *destination, int value, const size_t count) {
|
|
if (destination == NULL) {
|
|
return HALLOCY_ERROR_INVALID_POINTER;
|
|
}
|
|
|
|
unsigned char *destination_bytes = (unsigned char*)destination;
|
|
unsigned char *end_address = destination_bytes + count;
|
|
|
|
unsigned char value_bytes = (unsigned char)value;
|
|
|
|
switch (hallocy_is_simd_supported()) {
|
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
|
case HALLOCY_SIMD_NEON: {
|
|
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
uint8x16_t simd_value = vdupq_n_u8(value_bytes);
|
|
while (destination_bytes - end_address >= 16) {
|
|
vst1q_u8(destination_bytes, simd_value);
|
|
destination_bytes += 16;
|
|
}
|
|
break;
|
|
}
|
|
#else
|
|
case HALLOCY_SIMD_AVX512: {
|
|
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
__m512i simd_value = _mm512_set1_epi8(value_bytes);
|
|
while (destination_bytes - end_address >= 64) {
|
|
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
|
destination_bytes += 64;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case HALLOCY_SIMD_AVX2: {
|
|
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
__m256i simd_value = _mm256_set1_epi8(value_bytes);
|
|
while (destination_bytes - end_address >= 32) {
|
|
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
|
destination_bytes += 32;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case HALLOCY_SIMD_AVX:
|
|
case HALLOCY_SIMD_SSE2:
|
|
case HALLOCY_SIMD_SSE: {
|
|
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
__m128i simd_value = _mm_set1_epi8(value_bytes);
|
|
while (destination_bytes - end_address >= 16) {
|
|
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
|
destination_bytes += 16;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
|
|
#endif
|
|
default: {
|
|
size_t word_size = sizeof(size_t);
|
|
while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
size_t value_word = 0;
|
|
for (size_t i = 0; i < word_size; i++) {
|
|
value_word |= (size_t)value_bytes << (i * 8);
|
|
}
|
|
|
|
size_t *destination_word = (size_t*)destination_bytes;
|
|
while ((unsigned char*)(destination_word + 1) < end_address) {
|
|
*destination_word = value_word;
|
|
destination_word += 1;
|
|
}
|
|
|
|
destination_bytes = (unsigned char*)destination_word;
|
|
break;
|
|
}
|
|
}
|
|
|
|
while (destination_bytes != end_address) {
|
|
*destination_bytes = value_bytes;
|
|
destination_bytes += 1;
|
|
}
|
|
|
|
return HALLOCY_ERROR_NONE;
|
|
} |