1/* ANSI C standard library function memset. 2 3 Copyright (c) 2001-2008 Tensilica Inc. 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include <picolibc.h> 25 26#include "xtensa-asm.h" 27 28/* void *memset (void *dst, int c, size_t length) 29 30 The algorithm is as follows: 31 32 Create a word with c in all byte positions. 33 34 If the destination is aligned, set 16B chunks with a loop, and then 35 finish up with 8B, 4B, 2B, and 1B stores conditional on the length. 36 37 If the destination is unaligned, align it by conditionally 38 setting 1B and/or 2B and then go to aligned case. 39 40 This code tries to use fall-through branches for the common 41 case of an aligned destination (except for the branches to 42 the alignment labels). */ 43 44 45/* Byte-by-byte set. */ 46 47 .text 48 .begin schedule 49 .align XCHAL_INST_FETCH_WIDTH 50 .literal_position 51__memset_aux: 52 53 /* Skip bytes to get proper alignment for three-byte loop */ 54.skip XCHAL_INST_FETCH_WIDTH - 3 55 56.Lbyteset: 57#if XCHAL_HAVE_LOOPS 58 loopnez a4, 2f 59#else 60 beqz a4, 2f 61 add a6, a5, a4 // a6 = ending address 62#endif 631: s8i a3, a5, 0 64#if XTENSA_ESP32_PSRAM_CACHE_FIX 65 memw 66#endif 67 addi a5, a5, 1 68#if !XCHAL_HAVE_LOOPS 69 bltu a5, a6, 1b 70#endif 712: leaf_return 72 73 74/* Destination is unaligned. */ 75 76 .align 4 77 78.Ldst1mod2: // dst is only byte aligned 79 80 /* Do short sizes byte-by-byte. */ 81 bltui a4, 8, .Lbyteset 82 83 /* Set 1 byte. */ 84 s8i a3, a5, 0 85 addi a5, a5, 1 86 addi a4, a4, -1 87#if XTENSA_ESP32_PSRAM_CACHE_FIX 88 memw 89#endif 90 91 /* Now retest if dst is aligned. */ 92 _bbci.l a5, 1, .Ldstaligned 93 94.Ldst2mod4: // dst has 16-bit alignment 95 96 /* Do short sizes byte-by-byte. */ 97 bltui a4, 8, .Lbyteset 98 99 /* Set 2 bytes. */ 100 s16i a3, a5, 0 101 addi a5, a5, 2 102 addi a4, a4, -2 103#if XTENSA_ESP32_PSRAM_CACHE_FIX 104 memw 105#endif 106 107 /* dst is now aligned; return to main algorithm */ 108 j .Ldstaligned 109 110 111 .align 4 112 .global memset 113 .type memset, @function 114memset: 115 leaf_entry sp, 16 116 /* a2 = dst, a3 = c, a4 = length */ 117 118 /* Duplicate character into all bytes of word. */ 119 extui a3, a3, 0, 8 120 slli a7, a3, 8 121 or a3, a3, a7 122 slli a7, a3, 16 123 or a3, a3, a7 124 125 mov a5, a2 // copy dst so that a2 is return value 126 127 /* Check if dst is unaligned. */ 128 _bbsi.l a2, 0, .Ldst1mod2 129 _bbsi.l a2, 1, .Ldst2mod4 130.Ldstaligned: 131 132 /* Get number of loop iterations with 16B per iteration. */ 133 srli a7, a4, 4 134 135#if XTENSA_ESP32_PSRAM_CACHE_FIX 136 //do not do this if we have less than one iteration to do 137 beqz a7, 2f 138 //this seems to work to prefetch the cache line 139 s32i a3, a5, 0 140 nop 141#endif 142 143 /* Destination is word-aligned. */ 144#if XCHAL_HAVE_LOOPS 145 loopnez a7, 2f 146#else 147 beqz a7, 2f 148 slli a6, a7, 4 149 add a6, a6, a5 // a6 = end of last 16B chunk 150#endif 151 /* Set 16 bytes per iteration. */ 1521: s32i a3, a5, 0 153 s32i a3, a5, 4 154 s32i a3, a5, 8 155 s32i a3, a5, 12 156 addi a5, a5, 16 157#if !XCHAL_HAVE_LOOPS 158 bltu a5, a6, 1b 159#endif 160 161 /* Set any leftover pieces smaller than 16B. */ 1622: bbci.l a4, 3, 3f 163 164 /* Set 8 bytes. */ 165 s32i a3, a5, 0 166 s32i a3, a5, 4 167 addi a5, a5, 8 168 1693: bbci.l a4, 2, 4f 170 171 /* Set 4 bytes. */ 172 s32i a3, a5, 0 173 addi a5, a5, 4 174 1754: bbci.l a4, 1, 5f 176 177 /* Set 2 bytes. */ 178 s16i a3, a5, 0 179 addi a5, a5, 2 180#if XTENSA_ESP32_PSRAM_CACHE_FIX 181 memw 182#endif 183 1845: bbci.l a4, 0, 6f 185 186 /* Set 1 byte. */ 187 s8i a3, a5, 0 188#if XTENSA_ESP32_PSRAM_CACHE_FIX 189 memw 190#endif 1916: leaf_return 192 193 .end schedule 194 195 .size memset, . - memset 196