1/* 2 * ==================================================== 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. 4 * 5 * Permission to use, copy, modify, and distribute this 6 * software is freely granted, provided that this notice 7 * is preserved. 8 * ==================================================== 9 */ 10 11#include <picolibc.h> 12 13#ifndef __x86_64 14#include "../i386/memset.S" 15#else 16 #include "x86_64mach.h" 17 18 .global SYM (memset) 19 SOTYPE_FUNCTION(memset) 20 21SYM (memset): 22 movq rdi, r9 /* Save return value */ 23 movq rsi, rax 24 movq rdx, rcx 25 cmpq $16, rdx 26 jb byte_set 27 28 movq rdi, r8 /* Align on quad word boundary */ 29 andq $7, r8 30 jz quadword_aligned 31 movq $8, rcx 32 subq r8, rcx 33 subq rcx, rdx 34 rep stosb 35 movq rdx, rcx 36 37quadword_aligned: 38 movabs $0x0101010101010101, r8 39 movzbl sil, eax 40 imul r8, rax 41 cmpq $256, rdx 42 jb quadword_set 43 44 shrq $7, rcx /* Store 128 bytes at a time with minimum cache polution */ 45 46 .p2align 4 47loop: 48 movntiq rax, (rdi) 49 movntiq rax, 8 (rdi) 50 movntiq rax, 16 (rdi) 51 movntiq rax, 24 (rdi) 52 movntiq rax, 32 (rdi) 53 movntiq rax, 40 (rdi) 54 movntiq rax, 48 (rdi) 55 movntiq rax, 56 (rdi) 56 movntiq rax, 64 (rdi) 57 movntiq rax, 72 (rdi) 58 movntiq rax, 80 (rdi) 59 movntiq rax, 88 (rdi) 60 movntiq rax, 96 (rdi) 61 movntiq rax, 104 (rdi) 62 movntiq rax, 112 (rdi) 63 movntiq rax, 120 (rdi) 64 65 leaq 128 (rdi), rdi 66 67 dec rcx 68 jnz loop 69 70 sfence 71 movq rdx, rcx 72 andq $127, rcx 73 rep stosb 74 movq r9, rax 75 ret 76 77 78byte_set: 79 rep stosb 80 movq r9, rax 81 ret 82 83 84quadword_set: 85 shrq $3, rcx 86 .p2align 4 87 rep stosq 88 movq rdx, rcx 89 andq $7, rcx 90 rep stosb /* Store the remaining bytes */ 91 movq r9, rax 92 ret 93 94#if defined(__linux__) && defined(__ELF__) 95.section .note.GNU-stack,"",%progbits 96#endif 97#endif 98