/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ----------------------------------------------------------------------------- * File: Memory.c * Description: * This file implements the functions for managing memory. It includes functions * to copy, move, compare and set memory. * * Author: Mineplay * ----------------------------------------------------------------------------- */ #include "../../Include/Hallocy/Core/Memory.h" #include "../../Include/Hallocy/Utils/Simd.h" #include #include HallocyError hallocy_set_memory(void *destination, int value, const size_t size) { if (destination == NULL) { return HALLOCY_ERROR_INVALID_POINTER; } unsigned char *destination_bytes = (unsigned char*)destination; unsigned char *end_address = destination_bytes + size; unsigned char value_bytes = (unsigned char)value; switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } uint8x16_t simd_value = vdupq_n_u8(value_bytes); while (destination_bytes - end_address >= 16) { vst1q_u8(destination_bytes, simd_value); destination_bytes += 16; } break; } #else case HALLOCY_SIMD_AVX512: { while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } __m512i simd_value = _mm512_set1_epi8(value_bytes); while (destination_bytes - end_address >= 64) { _mm512_store_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; } break; } case HALLOCY_SIMD_AVX2: { while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } __m256i simd_value = _mm256_set1_epi8(value_bytes); while (destination_bytes - end_address >= 32) { _mm256_store_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; } break; } case HALLOCY_SIMD_AVX: case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } __m128i simd_value = _mm_set1_epi8(value_bytes); while (destination_bytes - end_address >= 16) { _mm_store_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; } break; } #endif default: { size_t word_size = sizeof(size_t); while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } size_t value_word = 0; for (size_t i = 0; i < word_size; i++) { value_word |= (size_t)value_bytes << (i * 8); } size_t *destination_word = (size_t*)destination_bytes; while ((unsigned char*)(destination_word + 1) < end_address) { *destination_word = value_word; destination_word += 1; } destination_bytes = (unsigned char*)destination_word; break; } } while (destination_bytes != end_address) { *destination_bytes = value_bytes; destination_bytes += 1; } return HALLOCY_ERROR_NONE; } HallocyError hallocy_copy_memory(void *destination, void *source, const size_t size) { if (destination == NULL || source == NULL) { return HALLOCY_ERROR_INVALID_POINTER; } unsigned char *destination_bytes = (unsigned char*)destination; unsigned char *source_bytes = (unsigned char*)source; unsigned char *end_address = destination_bytes + size; switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } } uint8x16_t simd_value; while (destination_bytes - end_address >= 16) { simd_value = vdupq_n_u8(source_bytes); vst1q_u8(destination_bytes, simd_value); destination_bytes += 16; source_bytes += 16; } break; } #else case HALLOCY_SIMD_AVX512: { if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m512i simd_value; while (destination_bytes - end_address >= 64) { simd_value = _mm512_load_si512((__m512i*)source_bytes); _mm512_store_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; source_bytes += 64; } } else { __m512i simd_value; while (destination_bytes - end_address >= 64) { simd_value = _mm512_loadu_si512((__m512i*)source_bytes); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); destination_bytes += 64; source_bytes += 64; } } break; } case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m256i simd_value; while (destination_bytes - end_address >= 32) { simd_value = _mm256_load_si256((__m256i*)source_bytes); _mm256_store_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; source_bytes += 32; } } else { __m256i simd_value; while (destination_bytes - end_address >= 32) { simd_value = _mm256_loadu_si256((__m256i*)source_bytes); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); destination_bytes += 32; source_bytes += 32; } } break; } case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } __m128i simd_value; while (destination_bytes - end_address >= 16) { simd_value = _mm_load_si128((__m128i*)source_bytes); _mm_store_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; source_bytes += 16; } } else { __m128i simd_value; while (destination_bytes - end_address >= 16) { simd_value = _mm_loadu_si128((__m128i*)source_bytes); _mm_storeu_si128((__m128i*)destination_bytes, simd_value); destination_bytes += 16; source_bytes += 16; } } break; } #endif default: { size_t word_size = sizeof(size_t); if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) { while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } size_t *destination_word = (size_t*)destination_bytes; size_t *source_word = (size_t*)source_bytes; while ((unsigned char*)(destination_word + 1) < end_address) { *destination_word = *source_word; destination_word += 1; source_word += 1; } source_bytes = (unsigned char*)source_word; destination_bytes = (unsigned char*)destination_word; } break; } } while (destination_bytes != end_address) { *destination_bytes = *source_bytes; destination_bytes += 1; source_bytes += 1; } return HALLOCY_ERROR_NONE; } HallocyError hallocy_move_memory(void *destination, void *source, const size_t size) { if (!((void*)((size_t)source + size) > destination && source < destination)) { return hallocy_copy_memory(destination, source, size); } if (destination == NULL || source == NULL) { return HALLOCY_ERROR_INVALID_POINTER; } unsigned char *end_address = (unsigned char*)destination; unsigned char *destination_bytes = (unsigned char*)destination + size; unsigned char *source_bytes = (unsigned char*)source + size; switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 16) { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } } uint8x16_t simd_value; while (end_address - destination_bytes >= 16) { destination_bytes -= 16; source_bytes -= 16; simd_value = vdupq_n_u8(source_bytes); vst1q_u8(destination_bytes, simd_value); } break; } #else case HALLOCY_SIMD_AVX512: { if ((size_t)destination_bytes % 64 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 64) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } __m512i simd_value; while (end_address - destination_bytes >= 64) { destination_bytes -= 64; source_bytes -= 64; simd_value = _mm512_load_si512((__m512i*)source_bytes); _mm512_store_si512((__m512i*)destination_bytes, simd_value); } } else { __m512i simd_value; while (end_address - destination_bytes >= 64) { destination_bytes -= 64; source_bytes -= 64; simd_value = _mm512_loadu_si512((__m512i*)source_bytes); _mm512_storeu_si512((__m512i*)destination_bytes, simd_value); } } break; } case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { if ((size_t)destination_bytes % 32 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 32) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } __m256i simd_value; while (end_address - destination_bytes >= 32) { destination_bytes -= 32; source_bytes -= 32; simd_value = _mm256_load_si256((__m256i*)source_bytes); _mm256_store_si256((__m256i*)destination_bytes, simd_value); } } else { __m256i simd_value; while (end_address - destination_bytes >= 32) { destination_bytes -= 32; source_bytes -= 32; simd_value = _mm256_loadu_si256((__m256i*)source_bytes); _mm256_storeu_si256((__m256i*)destination_bytes, simd_value); } } break; } case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { if ((size_t)destination_bytes % 16 == (size_t)source_bytes % 64) { while (((size_t)destination_bytes % 16) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } __m128i simd_value; while (end_address - destination_bytes >= 16) { destination_bytes -= 16; source_bytes -= 16; simd_value = _mm_load_si128((__m128i*)source_bytes); _mm_store_si128((__m128i*)destination_bytes, simd_value); } } else { __m128i simd_value; while (end_address - destination_bytes >= 16) { destination_bytes -= 16; source_bytes -= 16; simd_value = _mm_loadu_si128((__m128i*)source_bytes); _mm_storeu_si128((__m128i*)destination_bytes, simd_value); } } break; } #endif default: { size_t word_size = sizeof(size_t); if ((size_t)destination_bytes % word_size == (size_t)source_bytes % word_size) { while (((size_t)destination_bytes % word_size) != 0 && destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } size_t *destination_word = (size_t*)destination_bytes; size_t *source_word = (size_t*)source_bytes; while ((unsigned char*)(destination_word - 1) > end_address) { destination_word -= 1; source_word -= 1; *destination_word = *source_word; } source_bytes = (unsigned char*)source_word; destination_bytes = (unsigned char*)destination_word; } break; } } while (destination_bytes != end_address) { destination_bytes -= 1; source_bytes -= 1; *destination_bytes = *source_bytes; } return HALLOCY_ERROR_NONE; } bool hallocy_compare_memory(void *left_side, void *right_side, const size_t size) { if (left_side == NULL || right_side == NULL) { return false; } unsigned char *left_side_bytes = (unsigned char*)left_side; unsigned char *right_side_bytes = (unsigned char*)right_side; unsigned char *end_address = left_side_bytes + size; switch (hallocy_is_simd_supported()) { #if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm__) case HALLOCY_SIMD_NEON: { if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } left_side_bytes += 1; right_side_bytes += 1; } } while (end_address - destination_bytes >= 16) { uint8x16_t simd_left_side_value = vdupq_n_u8(left_side_bytes); uint8x16_t simd_right_side_value = vdupq_n_u8(right_side_bytes); uint8x16_t result = vceqq_u8(simd_left_side_value, simd_right_side_value); if (vmaxvq_u8(result) != 0xFF) { return false; } left_side_bytes += 16; right_side_bytes += 16 } break; } #else case HALLOCY_SIMD_AVX512: { if ((size_t)left_side_bytes % 64 == (size_t)right_side_bytes % 64) { while (((size_t)left_side_bytes % 64) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } left_side_bytes += 1; right_side_bytes += 1; } while (left_side_bytes - end_address >= 64) { __m512i simd_left_side_value = _mm512_load_si512((__m512i*)left_side_bytes); __m512i simd_right_side_value = _mm512_load_si512((__m512i*)right_side_bytes); __m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value); if (_mm512_test_epi64_mask(result, result) != 0) { return false; } left_side_bytes += 64; right_side_bytes += 64; } } else { while (left_side_bytes - end_address >= 64) { __m512i simd_left_side_value = _mm512_loadu_si512((__m512i*)left_side_bytes); __m512i simd_right_side_value = _mm512_loadu_si512((__m512i*)right_side_bytes); __m512i result = _mm512_xor_si512(simd_left_side_value, simd_right_side_value); if (_mm512_test_epi64_mask(result, result) != 0) { return false; } left_side_bytes += 64; right_side_bytes += 64; } } break; } case HALLOCY_SIMD_AVX2: case HALLOCY_SIMD_AVX: { if ((size_t)left_side_bytes % 32 == (size_t)right_side_bytes % 32) { while (((size_t)left_side_bytes % 32) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } left_side_bytes += 1; right_side_bytes += 1; } while (left_side_bytes - end_address >= 32) { __m256i simd_left_side_value = _mm256_load_si256((__m256i*)left_side_bytes); __m256i simd_right_side_value = _mm256_load_si256((__m256i*)right_side_bytes); __m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value); if (_mm256_testz_si256(result, result) == 0) { return false; } left_side_bytes += 32; right_side_bytes += 32; } } else { while (left_side_bytes - end_address >= 32) { __m256i simd_left_side_value = _mm256_loadu_si256((__m256i*)left_side_bytes); __m256i simd_right_side_value = _mm256_loadu_si256((__m256i*)right_side_bytes); __m256i result = _mm256_xor_si256(simd_left_side_value, simd_right_side_value); if (_mm256_testz_si256(result, result) == 0) { return false; } left_side_bytes += 32; right_side_bytes += 32; } } break; } case HALLOCY_SIMD_SSE2: case HALLOCY_SIMD_SSE: { if ((size_t)left_side_bytes % 16 == (size_t)right_side_bytes % 16) { while (((size_t)left_side_bytes % 16) != 0 && left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } left_side_bytes += 1; right_side_bytes += 1; } while (left_side_bytes - end_address >= 16) { __m128i simd_left_side_value = _mm_load_si128((__m128i*)left_side_bytes); __m128i simd_right_side_value = _mm_load_si128((__m128i*)right_side_bytes); __m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value); if (_mm_testz_si128(result, result) == 0) { return false; } left_side_bytes += 16; right_side_bytes += 16; } } else { while (left_side_bytes - end_address >= 16) { __m128i simd_left_side_value = _mm_loadu_si128((__m128i*)left_side_bytes); __m128i simd_right_side_value = _mm_loadu_si128((__m128i*)right_side_bytes); __m128i result = _mm_xor_si128(simd_left_side_value, simd_right_side_value); if (_mm_testz_si128(result, result) == 0) { return false; } left_side_bytes += 16; right_side_bytes += 16; } } break; } #endif default: { size_t word_size = sizeof(size_t); if ((size_t)left_side_bytes % word_size == (size_t)right_side_bytes % word_size) { while (((size_t)left_side_bytes % word_size) != 0 && left_side_bytes != end_address) { *left_side_bytes = *right_side_bytes; left_side_bytes += 1; right_side_bytes += 1; } size_t *left_side_word = (size_t*)left_side_bytes; size_t *right_side_word = (size_t*)right_side_bytes; while ((unsigned char*)(left_side_word + 1) < end_address) { if (*left_side_word != *right_side_word) { return false; } left_side_word += 1; right_side_word += 1; } right_side_bytes = (unsigned char*)right_side_word; left_side_bytes = (unsigned char*)left_side_word; } break; } } while (left_side_bytes != end_address) { if (*left_side_bytes != *right_side_bytes) { return false; } left_side_bytes += 1; right_side_bytes += 1; } return true; }