// SPDX-License-Identifier: Zlib // SPDX-FileNotice: Modified from the original version by the BlocksDS project. // // Copyright (C) 2021-2023 agbabi contributors // // ABI: // __aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8 // Standard: // memcpy // Support: // __ndsabi_memcpy2, __ndsabi_memcpy1 #include #include "macros.inc" .syntax unified .arm BEGIN_ASM_FUNC __aeabi_memcpy @ >6-bytes is roughly the threshold when byte-by-byte copy is slower cmp r2, #6 ble __ndsabi_memcpy1 align_switch r0, r1, r3, __ndsabi_memcpy1, .Lcopy_halves @ Check if r0 (or r1) needs word aligning rsbs r3, r0, #4 joaobapt_test r3 @ Copy byte head to align ldrbmi r3, [r1], #1 strbmi r3, [r0], #1 submi r2, r2, #1 @ r0, r1 are now half aligned @ Copy half head to align ldrhcs r3, [r1], #2 strhcs r3, [r0], #2 subcs r2, r2, #2 @ r0, r1 are now word aligned BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy8 BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy4 cmp r2, #32 blt .Lcopy_words @ Word aligned, 32-byte copy push {r4-r10} .Lloop_32: subs r2, r2, #32 ldmiage r1!, {r3-r10} stmiage r0!, {r3-r10} bgt .Lloop_32 pop {r4-r10} bxeq lr @ < 32 bytes remaining to be copied add r2, r2, #32 .Lcopy_words: cmp r2, #4 blt .Lcopy_halves .Lloop_4: subs r2, r2, #4 ldrge r3, [r1], #4 strge r3, [r0], #4 bgt .Lloop_4 bxeq lr @ Copy byte & half tail @ This test still works when r2 is negative joaobapt_test r2 @ Copy half ldrhcs r3, [r1], #2 strhcs r3, [r0], #2 @ Copy byte ldrbmi r3, [r1] strbmi r3, [r0] bx lr .Lcopy_halves: @ Copy byte head to align tst r0, #1 ldrbne r3, [r1], #1 strbne r3, [r0], #1 subne r2, r2, #1 @ r0, r1 are now half aligned BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy2 subs r2, r2, #2 ldrhge r3, [r1], #2 strhge r3, [r0], #2 bgt __ndsabi_memcpy2 bxeq lr @ Copy byte tail adds r2, r2, #2 ldrbne r3, [r1] strbne r3, [r0] bx lr BEGIN_ASM_FUNC __ndsabi_memcpy1 subs r2, r2, #1 ldrbge r3, [r1], #1 strbge r3, [r0], #1 bgt __ndsabi_memcpy1 bx lr BEGIN_ASM_FUNC memcpy push {r0, lr} bl __aeabi_memcpy pop {r0, lr} bx lr