| /* |
| * memset - fill memory with a constant |
| * |
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| * See https://llvm.org/LICENSE.txt for license information. |
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| */ |
| |
| /* |
| Written by Dave Gilbert <david.gilbert@linaro.org> |
| |
| This memset routine is optimised on a Cortex-A9 and should work on |
| all ARMv7 processors. |
| |
| */ |
| |
| .syntax unified |
| .arch armv7-a |
| |
| @ 2011-08-30 david.gilbert@linaro.org |
| @ Extracted from local git 2f11b436 |
| |
| @ this lets us check a flag in a 00/ff byte easily in either endianness |
| #ifdef __ARMEB__ |
| #define CHARTSTMASK(c) 1<<(31-(c*8)) |
| #else |
| #define CHARTSTMASK(c) 1<<(c*8) |
| #endif |
| .text |
| .thumb |
| |
| @ --------------------------------------------------------------------------- |
| .thumb_func |
| .align 2 |
| .p2align 4,,15 |
| .global __memset_arm |
| .type __memset_arm,%function |
| __memset_arm: |
| @ r0 = address |
| @ r1 = character |
| @ r2 = count |
| @ returns original address in r0 |
| |
| mov r3, r0 @ Leave r0 alone |
| cbz r2, 10f @ Exit if 0 length |
| |
| tst r0, #7 |
| beq 2f @ Already aligned |
| |
| @ Ok, so we're misaligned here |
| 1: |
| strb r1, [r3], #1 |
| subs r2,r2,#1 |
| tst r3, #7 |
| cbz r2, 10f @ Exit if we hit the end |
| bne 1b @ go round again if still misaligned |
| |
| 2: |
| @ OK, so we're aligned |
| push {r4,r5,r6,r7} |
| bics r4, r2, #15 @ if less than 16 bytes then need to finish it off |
| beq 5f |
| |
| 3: |
| @ POSIX says that ch is cast to an unsigned char. A uxtb is one |
| @ byte and takes two cycles, where an AND is four bytes but one |
| @ cycle. |
| and r1, #0xFF |
| orr r1, r1, r1, lsl#8 @ Same character into all bytes |
| orr r1, r1, r1, lsl#16 |
| mov r5,r1 |
| mov r6,r1 |
| mov r7,r1 |
| |
| 4: |
| subs r4,r4,#16 |
| stmia r3!,{r1,r5,r6,r7} |
| bne 4b |
| and r2,r2,#15 |
| |
| @ At this point we're still aligned and we have upto align-1 bytes left to right |
| @ we can avoid some of the byte-at-a time now by testing for some big chunks |
| tst r2,#8 |
| itt ne |
| subne r2,r2,#8 |
| stmiane r3!,{r1,r5} |
| |
| 5: |
| pop {r4,r5,r6,r7} |
| cbz r2, 10f |
| |
| @ Got to do any last < alignment bytes |
| 6: |
| subs r2,r2,#1 |
| strb r1,[r3],#1 |
| bne 6b |
| |
| 10: |
| bx lr @ goodbye |
| .size __memset_arm, . - __memset_arm |