1/* 2 * ==================================================== 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. 4 * 5 * Permission to use, copy, modify, and distribute this 6 * software is freely granted, provided that this notice 7 * is preserved. 8 * ==================================================== 9 */ 10 11#ifndef __x86_64 12#include "../i386/memset.S" 13#else 14 #include "x86_64mach.h" 15 16 .global SYM (memset) 17 SOTYPE_FUNCTION(memset) 18 19SYM (memset): 20 movq rdi, r9 /* Save return value */ 21 movq rsi, rax 22 movq rdx, rcx 23 cmpq $16, rdx 24 jb byte_set 25 26 movq rdi, r8 /* Align on quad word boundary */ 27 andq $7, r8 28 jz quadword_aligned 29 movq $8, rcx 30 subq r8, rcx 31 subq rcx, rdx 32 rep stosb 33 movq rdx, rcx 34 35quadword_aligned: 36 movabs $0x0101010101010101, r8 37 movzbl sil, eax 38 imul r8, rax 39 cmpq $256, rdx 40 jb quadword_set 41 42 shrq $7, rcx /* Store 128 bytes at a time with minimum cache polution */ 43 44 .p2align 4 45loop: 46 movntiq rax, (rdi) 47 movntiq rax, 8 (rdi) 48 movntiq rax, 16 (rdi) 49 movntiq rax, 24 (rdi) 50 movntiq rax, 32 (rdi) 51 movntiq rax, 40 (rdi) 52 movntiq rax, 48 (rdi) 53 movntiq rax, 56 (rdi) 54 movntiq rax, 64 (rdi) 55 movntiq rax, 72 (rdi) 56 movntiq rax, 80 (rdi) 57 movntiq rax, 88 (rdi) 58 movntiq rax, 96 (rdi) 59 movntiq rax, 104 (rdi) 60 movntiq rax, 112 (rdi) 61 movntiq rax, 120 (rdi) 62 63 leaq 128 (rdi), rdi 64 65 dec rcx 66 jnz loop 67 68 sfence 69 movq rdx, rcx 70 andq $127, rcx 71 rep stosb 72 movq r9, rax 73 ret 74 75 76byte_set: 77 rep stosb 78 movq r9, rax 79 ret 80 81 82quadword_set: 83 shrq $3, rcx 84 .p2align 4 85 rep stosq 86 movq rdx, rcx 87 andq $7, rcx 88 rep stosb /* Store the remaining bytes */ 89 movq r9, rax 90 ret 91 92#if defined(__linux__) && defined(__ELF__) 93.section .note.GNU-stack,"",%progbits 94#endif 95#endif 96