1/* 2 * ==================================================== 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. 4 * 5 * Permission to use, copy, modify, and distribute this 6 * software is freely granted, provided that this notice 7 * is preserved. 8 * ==================================================== 9 */ 10 11 #include "x86_64mach.h" 12 13 .global SYM (memcpy) 14 SOTYPE_FUNCTION(memcpy) 15 16SYM (memcpy): 17 movq rdi, rax /* Store destination in return value */ 18 cmpq $16, rdx 19 jb byte_copy 20 21 movq rdi, r8 /* Align destination on quad word boundary */ 22 andq $7, r8 23 jz quadword_aligned 24 movq $8, rcx 25 subq r8, rcx 26 subq rcx, rdx 27 rep movsb 28 29quadword_aligned: 30 cmpq $256, rdx 31 jb quadword_copy 32 33 pushq rax 34 pushq r12 35 pushq r13 36 pushq r14 37 38 movq rdx, rcx /* Copy 128 bytes at a time with minimum cache polution */ 39 shrq $7, rcx 40 41 .p2align 4 42loop: 43 prefetchnta 768 (rsi) 44 prefetchnta 832 (rsi) 45 46 movq (rsi), rax 47 movq 8 (rsi), r8 48 movq 16 (rsi), r9 49 movq 24 (rsi), r10 50 movq 32 (rsi), r11 51 movq 40 (rsi), r12 52 movq 48 (rsi), r13 53 movq 56 (rsi), r14 54 55 movntiq rax, (rdi) 56 movntiq r8 , 8 (rdi) 57 movntiq r9 , 16 (rdi) 58 movntiq r10, 24 (rdi) 59 movntiq r11, 32 (rdi) 60 movntiq r12, 40 (rdi) 61 movntiq r13, 48 (rdi) 62 movntiq r14, 56 (rdi) 63 64 movq 64 (rsi), rax 65 movq 72 (rsi), r8 66 movq 80 (rsi), r9 67 movq 88 (rsi), r10 68 movq 96 (rsi), r11 69 movq 104 (rsi), r12 70 movq 112 (rsi), r13 71 movq 120 (rsi), r14 72 73 movntiq rax, 64 (rdi) 74 movntiq r8 , 72 (rdi) 75 movntiq r9 , 80 (rdi) 76 movntiq r10, 88 (rdi) 77 movntiq r11, 96 (rdi) 78 movntiq r12, 104 (rdi) 79 movntiq r13, 112 (rdi) 80 movntiq r14, 120 (rdi) 81 82 leaq 128 (rsi), rsi 83 leaq 128 (rdi), rdi 84 85 dec rcx 86 jnz loop 87 88 sfence 89 movq rdx, rcx 90 andq $127, rcx 91 rep movsb 92 popq r14 93 popq r13 94 popq r12 95 popq rax 96 ret 97 98 99byte_copy: 100 movq rdx, rcx 101 rep movsb 102 ret 103 104 105quadword_copy: 106 movq rdx, rcx 107 shrq $3, rcx 108 .p2align 4 109 rep movsq 110 movq rdx, rcx 111 andq $7, rcx 112 rep movsb /* Copy the remaining bytes */ 113 ret 114 115#if defined(__linux__) && defined(__ELF__) 116.section .note.GNU-stack,"",%progbits 117#endif 118