/* * Copyright (c) 2007 Tim Kelly/Dialectronics * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to permit * persons to whom the Software is furnished to do so, subject to the * following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* this approach does not consider alignments */ /* void ppc_memcpy_4(void* src, void* dst, int size); */ .text .globl ppc_memcpy_4 ppc_memcpy_4: /* r3 contains source address */ /* r4 contains destination address */ /* r5 contains number of bytes */ /* r6, r7 volatile */ /* r0 contains zero */ xor r0, r0, r0 /* make sure length > 0 */ cmp cr0, 0x0, r5, r0 /* make sure src != dst */ cmp cr1, 0x0, r3, r4 /* bail if length less than */ /* or equal to zero */ ble- cr0, done /* bail if src == dst */ beq- cr1, done debug_33: /* if copying backwards while reading */ /* forward, overlap does not matter */ /* value is read already */ /* conversely, if copying forward use */ /* backward reading method to ignore */ /* overlap */ /* src can be less than or greater than dst */ /* if src < dst and src+nbytes > dst, overlap */ /* if src > dst and dst+nbytes > src, overlap */ /* other two conditions imply no overlap */ /* r3 contains src */ /* r4 contains dst */ /* r5 contains nbytes */ /* so.... */ add r6, r5, r3 add r7, r5, r4 /* r6 contains src+nbytes */ /* r7 contains dst+nbytes */ /* compare dst to src */ cmp cr0, 0, r4, r3 /* compare dst+nbytes to src */ cmp cr1, 0, r7, r3 /* dst > src? (copy from end) */ bgt+ cr0, +0x08 /* src > dst, is dst+nbytes > src? */ /* we have to do this because we may run into */ /* previously written region if copy from end */ bgt- cr1, Copy4FromStart /* the default is to copy backwards */ Copy4FromEnd: /* prolog: copy up to three bytes */ get1_end: /* get mod(1) bytes */ andi. r7, r5, 0x01 cmp cr0, 0x0, r7, r0 beq cr0, get2_end sub r5, r5, r7 /* load and store the byte */ lbzx r6, r5, r3 stbx r6, r5, r4 get2_end: /* get mod(2) bytes */ andi. r7, r5, 0x02 cmp cr0, 0x0, r7, r0 beq cr0, get4_end sub r5, r5, r7 /* load and store the halfword */ lhzx r6, r5, r3 sthx r6, r5, r4 get4_end: /* make sure we haven't copied all of the bytes */ cmp cr0, 0x0, r5, r0 beq- cr0, done /* loop while load/store 4 bytes at a time */ subi r5, r5, 0x04 cmp cr0, 0x0, r5, r0 /* load/store 4 bytes */ lwzx r6, r5, r3 stwx r6, r5, r4 bgt+ cr0, -16 b done /* this approach is to copy forwards by */ /* incrementing the addresses of src and dst */ Copy4FromStart: /* prolog: copy up to three bytes */ get1_st: /* get mod(1) bytes */ andi. r7, r5, 0x01 cmp cr0, 0x0, r7, r0 beq cr0, get2_st sub r5, r5, r7 /* load and store the byte */ lbz r6, 0(r3) stb r6, 0(r4) /* increase the addresses if non-zero mod */ addi r3, r3, 0x01 addi r4, r4, 0x01 get2_st: /* get mod(2) bytes */ andi. r7, r5, 0x02 cmp cr0, 0x0, r7, r0 beq cr0, get4_st sub r5, r5, r7 /* load and store the halfword */ lhz r6, 0(r3) sth r6, 0(r4) /* increase the addresses if non-zero mod */ addi r3, r3, 0x02 addi r4, r4, 0x02 /* now loop on four byte read/writes */ /* get mod(4) bytes */ get4_st: /* andi. r7, r5, 0x04 */ cmp cr0, 0x0, r5, r0 beq cr0, done subi r5, r5, 0x04 /* load and store the word */ lwz r6, 0(r3) stw r6, 0(r4) /* increase the addresses if non-zero mod */ addi r3, r3, 0x04 addi r4, r4, 0x04 b get4_st done: /* eieio */ blr compx4: subi r5, r5, 0x04 cmp cr0, 0x0, r5, r0 lwzx r6, r5, r3 stwx r6, r5, r4 bgt cr0, compx4 b done /* if copying backwards while reading */ /* forward, overlap does not matter */ /* value is read already */ /* conversely, if copying forward use */ /* backward reading method to ignore */ /* overlap */ /* make sure diff > 3 by sub. */ /* cr0 gets set by sub. op */ sub. r6, r3, r4 /* cr0 contains information from r4-r3 */ /* if result < 0 get absolute value */ bgt+ cr0, +0x08 neg r6, r6 /* make sure copy distance > 3 bytes */ cmpi cr0, 0, r6, 0x03 /* bail to single byte copying if so */ /* r3, r4, and r5 have been untouched */ ble- cr0, ppc_memcpy