/* ANSI C standard library function memset. Copyright (c) 2001-2008 Tensilica Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "xtensa-asm.h" /* void *memset (void *dst, int c, size_t length) The algorithm is as follows: Create a word with c in all byte positions. If the destination is aligned, set 16B chunks with a loop, and then finish up with 8B, 4B, 2B, and 1B stores conditional on the length. If the destination is unaligned, align it by conditionally setting 1B and/or 2B and then go to aligned case. This code tries to use fall-through branches for the common case of an aligned destination (except for the branches to the alignment labels). */ /* Byte-by-byte set. */ .text .begin schedule .align XCHAL_INST_FETCH_WIDTH .literal_position __memset_aux: /* Skip bytes to get proper alignment for three-byte loop */ .skip XCHAL_INST_FETCH_WIDTH - 3 .Lbyteset: #if XCHAL_HAVE_LOOPS loopnez a4, 2f #else beqz a4, 2f add a6, a5, a4 // a6 = ending address #endif 1: s8i a3, a5, 0 #if XTENSA_ESP32_PSRAM_CACHE_FIX memw #endif addi a5, a5, 1 #if !XCHAL_HAVE_LOOPS bltu a5, a6, 1b #endif 2: leaf_return /* Destination is unaligned. */ .align 4 .Ldst1mod2: // dst is only byte aligned /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset /* Set 1 byte. */ s8i a3, a5, 0 addi a5, a5, 1 addi a4, a4, -1 #if XTENSA_ESP32_PSRAM_CACHE_FIX memw #endif /* Now retest if dst is aligned. */ _bbci.l a5, 1, .Ldstaligned .Ldst2mod4: // dst has 16-bit alignment /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset /* Set 2 bytes. */ s16i a3, a5, 0 addi a5, a5, 2 addi a4, a4, -2 #if XTENSA_ESP32_PSRAM_CACHE_FIX memw #endif /* dst is now aligned; return to main algorithm */ j .Ldstaligned .align 4 .global memset .type memset, @function memset: leaf_entry sp, 16 /* a2 = dst, a3 = c, a4 = length */ /* Duplicate character into all bytes of word. */ extui a3, a3, 0, 8 slli a7, a3, 8 or a3, a3, a7 slli a7, a3, 16 or a3, a3, a7 mov a5, a2 // copy dst so that a2 is return value /* Check if dst is unaligned. */ _bbsi.l a2, 0, .Ldst1mod2 _bbsi.l a2, 1, .Ldst2mod4 .Ldstaligned: /* Get number of loop iterations with 16B per iteration. */ srli a7, a4, 4 #if XTENSA_ESP32_PSRAM_CACHE_FIX //do not do this if we have less than one iteration to do beqz a7, 2f //this seems to work to prefetch the cache line s32i a3, a5, 0 nop #endif /* Destination is word-aligned. */ #if XCHAL_HAVE_LOOPS loopnez a7, 2f #else beqz a7, 2f slli a6, a7, 4 add a6, a6, a5 // a6 = end of last 16B chunk #endif /* Set 16 bytes per iteration. */ 1: s32i a3, a5, 0 s32i a3, a5, 4 s32i a3, a5, 8 s32i a3, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS bltu a5, a6, 1b #endif /* Set any leftover pieces smaller than 16B. */ 2: bbci.l a4, 3, 3f /* Set 8 bytes. */ s32i a3, a5, 0 s32i a3, a5, 4 addi a5, a5, 8 3: bbci.l a4, 2, 4f /* Set 4 bytes. */ s32i a3, a5, 0 addi a5, a5, 4 4: bbci.l a4, 1, 5f /* Set 2 bytes. */ s16i a3, a5, 0 addi a5, a5, 2 #if XTENSA_ESP32_PSRAM_CACHE_FIX memw #endif 5: bbci.l a4, 0, 6f /* Set 1 byte. */ s8i a3, a5, 0 #if XTENSA_ESP32_PSRAM_CACHE_FIX memw #endif 6: leaf_return .end schedule .size memset, . - memset