feat(memory management): implemented memeory copy function
This commit is contained in:
parent
64a3f60228
commit
3ae91ad59c
3 changed files with 166 additions and 9 deletions
|
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "../Utils/Error.h"
|
#include "../Utils/Error.h"
|
||||||
|
|
||||||
HallocyError hallocy_set_memory(void *destination, int value, const size_t count);
|
HallocyError hallocy_set_memory(void *destination, int value, const size_t size);
|
||||||
|
HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -26,13 +26,13 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
HallocyError hallocy_set_memory(void *destination, int value, const size_t count) {
|
HallocyError hallocy_set_memory(void *destination, int value, const size_t size) {
|
||||||
if (destination == NULL) {
|
if (destination == NULL) {
|
||||||
return HALLOCY_ERROR_INVALID_POINTER;
|
return HALLOCY_ERROR_INVALID_POINTER;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char *destination_bytes = (unsigned char*)destination;
|
unsigned char *destination_bytes = (unsigned char*)destination;
|
||||||
unsigned char *end_address = destination_bytes + count;
|
unsigned char *end_address = destination_bytes + size;
|
||||||
|
|
||||||
unsigned char value_bytes = (unsigned char)value;
|
unsigned char value_bytes = (unsigned char)value;
|
||||||
|
|
||||||
|
|
@ -60,7 +60,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count
|
||||||
|
|
||||||
__m512i simd_value = _mm512_set1_epi8(value_bytes);
|
__m512i simd_value = _mm512_set1_epi8(value_bytes);
|
||||||
while (destination_bytes - end_address >= 64) {
|
while (destination_bytes - end_address >= 64) {
|
||||||
_mm512_storeu_si512((__m512i*)destination_bytes, simd_value);
|
_mm512_store_si512((__m512i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 64;
|
destination_bytes += 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,7 +75,7 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count
|
||||||
|
|
||||||
__m256i simd_value = _mm256_set1_epi8(value_bytes);
|
__m256i simd_value = _mm256_set1_epi8(value_bytes);
|
||||||
while (destination_bytes - end_address >= 32) {
|
while (destination_bytes - end_address >= 32) {
|
||||||
_mm256_storeu_si256((__m256i*)destination_bytes, simd_value);
|
_mm256_store_si256((__m256i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 32;
|
destination_bytes += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -92,14 +92,12 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count
|
||||||
|
|
||||||
__m128i simd_value = _mm_set1_epi8(value_bytes);
|
__m128i simd_value = _mm_set1_epi8(value_bytes);
|
||||||
while (destination_bytes - end_address >= 16) {
|
while (destination_bytes - end_address >= 16) {
|
||||||
_mm_storeu_si128((__m128i*)destination_bytes, simd_value);
|
_mm_store_si128((__m128i*)destination_bytes, simd_value);
|
||||||
destination_bytes += 16;
|
destination_bytes += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
default: {
|
default: {
|
||||||
size_t word_size = sizeof(size_t);
|
size_t word_size = sizeof(size_t);
|
||||||
|
|
@ -129,5 +127,159 @@ HallocyError hallocy_set_memory(void *destination, int value, const size_t count
|
||||||
destination_bytes += 1;
|
destination_bytes += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return HALLOCY_ERROR_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size) {
|
||||||
|
if (destination == NULL || source == NULL) {
|
||||||
|
return HALLOCY_ERROR_INVALID_POINTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char *destination_bytes = (unsigned char*)destination;
|
||||||
|
unsigned char *source_bytes = (unsigned char*)source;
|
||||||
|
|
||||||
|
unsigned char *end_address = destination_bytes + size;
|
||||||
|
|
||||||
|
switch (hallocy_is_simd_supported()) {
|
||||||
|
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__)
|
||||||
|
case HALLOCY_SIMD_NEON: {
|
||||||
|
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) {
|
||||||
|
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8x16_t simd_value;
|
||||||
|
while (destination_bytes - end_address >= 16) {
|
||||||
|
simd_value = vdupq_n_u8(source_bytes);
|
||||||
|
vst1q_u8(destination_bytes, simd_value);
|
||||||
|
destination_bytes += 16;
|
||||||
|
source_bytes += 16;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
case HALLOCY_SIMD_AVX512: {
|
||||||
|
if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) {
|
||||||
|
while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m512i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 64) {
|
||||||
|
simd_value = _mm512_load_si512((__m512i*)source_bytes);
|
||||||
|
_mm512_store_si512((__m512i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 64;
|
||||||
|
source_bytes += 64;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
__m512i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 64) {
|
||||||
|
simd_value = _mm512_loadu_si512((__m512i*)source_bytes);
|
||||||
|
_mm512_storeu_si512((__m512i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 64;
|
||||||
|
source_bytes += 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case HALLOCY_SIMD_AVX2:
|
||||||
|
case HALLOCY_SIMD_AVX: {
|
||||||
|
if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) {
|
||||||
|
while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m256i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 32) {
|
||||||
|
simd_value = _mm256_load_si256((__m256i*)source_bytes);
|
||||||
|
_mm256_store_si256((__m256i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 32;
|
||||||
|
source_bytes += 32;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
__m256i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 32) {
|
||||||
|
simd_value = _mm256_loadu_si256((__m256i*)source_bytes);
|
||||||
|
_mm256_storeu_si256((__m256i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 32;
|
||||||
|
source_bytes += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case HALLOCY_SIMD_SSE2:
|
||||||
|
case HALLOCY_SIMD_SSE: {
|
||||||
|
if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) {
|
||||||
|
while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 16) {
|
||||||
|
simd_value = _mm_load_si128((__m128i*)source_bytes);
|
||||||
|
_mm_store_si128((__m128i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 16;
|
||||||
|
source_bytes += 16;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
__m128i simd_value;
|
||||||
|
while (destination_bytes - end_address >= 16) {
|
||||||
|
simd_value = _mm_loadu_si128((__m128i*)source_bytes);
|
||||||
|
_mm_storeu_si128((__m128i*)destination, simd_value);
|
||||||
|
|
||||||
|
destination_bytes += 16;
|
||||||
|
source_bytes += 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
default: {
|
||||||
|
size_t word_size = sizeof(size_t);
|
||||||
|
if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) {
|
||||||
|
while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t *destination_word = (size_t*)destination_bytes;
|
||||||
|
size_t *source_word = (size_t*)source_bytes;
|
||||||
|
while ((unsigned char*)(destination_word + 1) < end_address) {
|
||||||
|
*destination_word = *source_word;
|
||||||
|
destination_word += 1;
|
||||||
|
source_word += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
source_bytes = (unsigned char*)source_word;
|
||||||
|
destination_bytes = (unsigned char*)destination_word;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (destination_bytes != end_address) {
|
||||||
|
*destination_bytes = *source_bytes;
|
||||||
|
destination_bytes += 1;
|
||||||
|
source_bytes += 1;
|
||||||
|
}
|
||||||
|
|
||||||
return HALLOCY_ERROR_NONE;
|
return HALLOCY_ERROR_NONE;
|
||||||
}
|
}
|
||||||
|
|
@ -45,9 +45,13 @@
|
||||||
char *text = (char*)hallocy_malloc(36);
|
char *text = (char*)hallocy_malloc(36);
|
||||||
hallocy_set_memory(text, 'T', 35);
|
hallocy_set_memory(text, 'T', 35);
|
||||||
|
|
||||||
text[100] = '\0';
|
text[35] = '\0';
|
||||||
printf("%s\n", text);
|
printf("%s\n", text);
|
||||||
|
|
||||||
|
char *text_copy = (char*)hallocy_malloc(36);
|
||||||
|
hallocy_copy_memory(text_copy, text, 36);
|
||||||
|
printf("%s\n", text_copy);
|
||||||
|
|
||||||
hallocy_free(text);
|
hallocy_free(text);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue