1/* ANSI C standard library function memset. 2 3 Copyright (c) 2001-2008 Tensilica Inc. 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "xtensa-asm.h" 25 26/* void *memset (void *dst, int c, size_t length) 27 28 The algorithm is as follows: 29 30 Create a word with c in all byte positions. 31 32 If the destination is aligned, set 16B chunks with a loop, and then 33 finish up with 8B, 4B, 2B, and 1B stores conditional on the length. 34 35 If the destination is unaligned, align it by conditionally 36 setting 1B and/or 2B and then go to aligned case. 37 38 This code tries to use fall-through branches for the common 39 case of an aligned destination (except for the branches to 40 the alignment labels). */ 41 42 43/* Byte-by-byte set. */ 44 45 .text 46 .begin schedule 47 .align XCHAL_INST_FETCH_WIDTH 48 .literal_position 49__memset_aux: 50 51 /* Skip bytes to get proper alignment for three-byte loop */ 52.skip XCHAL_INST_FETCH_WIDTH - 3 53 54.Lbyteset: 55#if XCHAL_HAVE_LOOPS 56 loopnez a4, 2f 57#else 58 beqz a4, 2f 59 add a6, a5, a4 // a6 = ending address 60#endif 611: s8i a3, a5, 0 62#if XTENSA_ESP32_PSRAM_CACHE_FIX 63 memw 64#endif 65 addi a5, a5, 1 66#if !XCHAL_HAVE_LOOPS 67 bltu a5, a6, 1b 68#endif 692: leaf_return 70 71 72/* Destination is unaligned. */ 73 74 .align 4 75 76.Ldst1mod2: // dst is only byte aligned 77 78 /* Do short sizes byte-by-byte. */ 79 bltui a4, 8, .Lbyteset 80 81 /* Set 1 byte. */ 82 s8i a3, a5, 0 83 addi a5, a5, 1 84 addi a4, a4, -1 85#if XTENSA_ESP32_PSRAM_CACHE_FIX 86 memw 87#endif 88 89 /* Now retest if dst is aligned. */ 90 _bbci.l a5, 1, .Ldstaligned 91 92.Ldst2mod4: // dst has 16-bit alignment 93 94 /* Do short sizes byte-by-byte. */ 95 bltui a4, 8, .Lbyteset 96 97 /* Set 2 bytes. */ 98 s16i a3, a5, 0 99 addi a5, a5, 2 100 addi a4, a4, -2 101#if XTENSA_ESP32_PSRAM_CACHE_FIX 102 memw 103#endif 104 105 /* dst is now aligned; return to main algorithm */ 106 j .Ldstaligned 107 108 109 .align 4 110 .global memset 111 .type memset, @function 112memset: 113 leaf_entry sp, 16 114 /* a2 = dst, a3 = c, a4 = length */ 115 116 /* Duplicate character into all bytes of word. */ 117 extui a3, a3, 0, 8 118 slli a7, a3, 8 119 or a3, a3, a7 120 slli a7, a3, 16 121 or a3, a3, a7 122 123 mov a5, a2 // copy dst so that a2 is return value 124 125 /* Check if dst is unaligned. */ 126 _bbsi.l a2, 0, .Ldst1mod2 127 _bbsi.l a2, 1, .Ldst2mod4 128.Ldstaligned: 129 130 /* Get number of loop iterations with 16B per iteration. */ 131 srli a7, a4, 4 132 133#if XTENSA_ESP32_PSRAM_CACHE_FIX 134 //do not do this if we have less than one iteration to do 135 beqz a7, 2f 136 //this seems to work to prefetch the cache line 137 s32i a3, a5, 0 138 nop 139#endif 140 141 /* Destination is word-aligned. */ 142#if XCHAL_HAVE_LOOPS 143 loopnez a7, 2f 144#else 145 beqz a7, 2f 146 slli a6, a7, 4 147 add a6, a6, a5 // a6 = end of last 16B chunk 148#endif 149 /* Set 16 bytes per iteration. */ 1501: s32i a3, a5, 0 151 s32i a3, a5, 4 152 s32i a3, a5, 8 153 s32i a3, a5, 12 154 addi a5, a5, 16 155#if !XCHAL_HAVE_LOOPS 156 bltu a5, a6, 1b 157#endif 158 159 /* Set any leftover pieces smaller than 16B. */ 1602: bbci.l a4, 3, 3f 161 162 /* Set 8 bytes. */ 163 s32i a3, a5, 0 164 s32i a3, a5, 4 165 addi a5, a5, 8 166 1673: bbci.l a4, 2, 4f 168 169 /* Set 4 bytes. */ 170 s32i a3, a5, 0 171 addi a5, a5, 4 172 1734: bbci.l a4, 1, 5f 174 175 /* Set 2 bytes. */ 176 s16i a3, a5, 0 177 addi a5, a5, 2 178#if XTENSA_ESP32_PSRAM_CACHE_FIX 179 memw 180#endif 181 1825: bbci.l a4, 0, 6f 183 184 /* Set 1 byte. */ 185 s8i a3, a5, 0 186#if XTENSA_ESP32_PSRAM_CACHE_FIX 187 memw 188#endif 1896: leaf_return 190 191 .end schedule 192 193 .size memset, . - memset 194