| OLD | NEW |
| 1 /* | 1 /* |
| 2 * ==================================================== | 2 * ==================================================== |
| 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. | 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. |
| 4 * | 4 * |
| 5 * Permission to use, copy, modify, and distribute this | 5 * Permission to use, copy, modify, and distribute this |
| 6 * software is freely granted, provided that this notice | 6 * software is freely granted, provided that this notice |
| 7 * is preserved. | 7 * is preserved. |
| 8 * ==================================================== | 8 * ==================================================== |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "x86_64mach.h" | 11 #include "x86_64mach.h" |
| 12 | 12 |
| 13 .global SYM (memcpy) | 13 .global SYM (memcpy) |
| 14 SOTYPE_FUNCTION(memcpy) | 14 SOTYPE_FUNCTION(memcpy) |
| 15 | 15 |
| 16 SYM (memcpy): | 16 SYM (memcpy): |
| 17 movl edi, eax /* Store destination in return value */ | 17 movl edi, eax /* Store destination in return value */ |
| 18 cmpl $16, edx | 18 cmpl $16, edx |
| 19 jb byte_copy | 19 jb .Lbyte_copy |
| 20 | 20 |
| 21 movl edi, r8d /* Align destination on quad word boundary */ | 21 movl edi, r8d /* Align destination on quad word boundary */ |
| 22 andl $7, r8d | 22 andl $7, r8d |
| 23 jz quadword_aligned | 23 jz .Lquadword_aligned |
| 24 movl $8, ecx | 24 movl $8, ecx |
| 25 subl r8d, ecx | 25 subl r8d, ecx |
| 26 subl ecx, edx | 26 subl ecx, edx |
| 27 | 27 |
| 28 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 28 .Lheader_loop: |
| 29 movb %nacl:(r15, rsi), r8b |
| 30 inc esi |
| 31 movb r8b, %nacl:(r15, rdi) |
| 32 inc edi |
| 33 dec ecx |
| 34 jnz .Lheader_loop |
| 29 | 35 |
| 30 quadword_aligned: | 36 .Lquadword_aligned: |
| 31 movl esi, esi /* We must clear top half for prefetch */ | |
| 32 cmpl $256, edx | 37 cmpl $256, edx |
| 33 jb quadword_copy | 38 jb .Lquadword_copy |
| 34 | 39 |
| 35 pushq rax | 40 pushq rax |
| 36 pushq r12 | 41 pushq r12 |
| 37 pushq r13 | 42 pushq r13 |
| 38 pushq r14 | 43 pushq r14 |
| 39 | 44 |
| 40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache
polution */ | 45 movl edx, ecx /* Copy 128 bytes at a time */ |
| 41 shrl $7, ecx | 46 shrl $7, ecx |
| 42 | 47 |
| 48 /* |
| 49 * Avoid revealing the sandbox base address. |
| 50 * In particular this means that we don't do the following: |
| 51 * movq 32(r15,rsi), r11 |
| 52 * ... |
| 53 * movq r11, %nacl:32(r15,rdi) |
| 54 * because the latter instruction might be reached via a direct or |
| 55 * indirect jump when r11 contains the sandbox base address in its |
| 56 * top 32 bits, and this would write the sandbox base address into |
| 57 * memory. We treat r11 as a write-only register to avoid |
| 58 * revealing the sandbox base address to user code. |
| 59 * Instead, we spill rdx and use that. Additionally, we avoid string |
| 60 * instructions (movs) because they leave the full 64 bits in rsi/rdi. |
| 61 */ |
| 62 pushq $0 |
| 63 movl ebp, (rsp) |
| 64 pushq rdx /* Save byte count */ |
| 43 .p2align 4 | 65 .p2align 4 |
| 44 loop: | |
| 45 prefetchnta 768 (r15,rsi) | |
| 46 prefetchnta 832 (r15,rsi) | |
| 47 | 66 |
| 48 movq %nacl: (r15,rsi), rax | 67 .Lloop: |
| 49 movq %nacl: 8 (r15,rsi), r8 | 68 naclrestbp esi, r15 |
| 50 movq %nacl: 16 (r15,rsi), r9 | 69 movq (rbp), rax |
| 51 movq %nacl: 24 (r15,rsi), r10 | 70 movq 8 (rbp), r8 |
| 52 movq %nacl: 32 (r15,rsi), r11 | 71 movq 16 (rbp), r9 |
| 53 movq %nacl: 40 (r15,rsi), r12 | 72 movq 24 (rbp), r10 |
| 54 movq %nacl: 48 (r15,rsi), r13 | 73 movq 32 (rbp), rdx |
| 55 movq %nacl: 56 (r15,rsi), r14 | 74 movq 40 (rbp), r12 |
| 75 movq 48 (rbp), r13 |
| 76 movq 56 (rbp), r14 |
| 56 | 77 |
| 57 movntiq rax, %nacl: (r15,rdi) | 78 naclrestbp edi, r15 |
| 58 movntiq r8 , %nacl: 8 (r15,rdi) | 79 movq rax, (rbp) |
| 59 movntiq r9 , %nacl: 16 (r15,rdi) | 80 movq r8 , 8 (rbp) |
| 60 movntiq r10, %nacl: 24 (r15,rdi) | 81 movq r9 , 16 (rbp) |
| 61 movntiq r11, %nacl: 32 (r15,rdi) | 82 movq r10, 24 (rbp) |
| 62 movntiq r12, %nacl: 40 (r15,rdi) | 83 movq rdx, 32 (rbp) |
| 63 movntiq r13, %nacl: 48 (r15,rdi) | 84 movq r12, 40 (rbp) |
| 64 movntiq r14, %nacl: 56 (r15,rdi) | 85 movq r13, 48 (rbp) |
| 86 movq r14, 56 (rbp) |
| 65 | 87 |
| 66 movq %nacl: 64 (r15,rsi), rax | 88 naclrestbp esi, r15 |
| 67 movq %nacl: 72 (r15,rsi), r8 | 89 movq 64 (rbp), rax |
| 68 movq %nacl: 80 (r15,rsi), r9 | 90 movq 72 (rbp), r8 |
| 69 movq %nacl: 88 (r15,rsi), r10 | 91 movq 80 (rbp), r9 |
| 70 movq %nacl: 96 (r15,rsi), r11 | 92 movq 88 (rbp), r10 |
| 71 movq %nacl: 104 (r15,rsi), r12 | 93 movq 96 (rbp), rdx |
| 72 movq %nacl: 112 (r15,rsi), r13 | 94 movq 104 (rbp), r12 |
| 73 movq %nacl: 120 (r15,rsi), r14 | 95 movq 112 (rbp), r13 |
| 96 movq 120 (rbp), r14 |
| 74 | 97 |
| 75 movntiq rax, %nacl: 64 (r15,rdi) | 98 naclrestbp edi, r15 |
| 76 movntiq r8 , %nacl: 72 (r15,rdi) | 99 movq rax, 64 (rbp) |
| 77 movntiq r9 , %nacl: 80 (r15,rdi) | 100 movq r8 , 72 (rbp) |
| 78 movntiq r10, %nacl: 88 (r15,rdi) | 101 movq r9 , 80 (rbp) |
| 79 movntiq r11, %nacl: 96 (r15,rdi) | 102 movq r10, 88 (rbp) |
| 80 movntiq r12, %nacl: 104 (r15,rdi) | 103 movq rdx, 96 (rbp) |
| 81 movntiq r13, %nacl: 112 (r15,rdi) | 104 movq r12, 104 (rbp) |
| 82 movntiq r14, %nacl: 120 (r15,rdi) | 105 movq r13, 112 (rbp) |
| 106 movq r14, 120 (rbp) |
| 83 | 107 |
| 84 leal 128 (rsi), esi | 108 leal 128 (rsi), esi |
| 85 leal 128 (rdi), edi | 109 leal 128 (rdi), edi |
| 86 | 110 |
| 87 dec ecx | 111 dec ecx |
| 88 jnz loop | 112 jnz .Lloop |
| 89 | 113 |
| 90 sfence | 114 popq rcx /* Restore byte count */ |
| 91 movl edx, ecx | 115 popq rax |
| 116 naclrestbp eax, r15 |
| 117 /* Copy the remaining bytes */ |
| 92 andl $127, ecx | 118 andl $127, ecx |
| 93 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 119 jz .Lrep1_end |
| 120 .Lrep1: |
| 121 movb %nacl:(r15, rsi), r8b |
| 122 inc esi |
| 123 movb r8b, %nacl:(r15, rdi) |
| 124 inc edi |
| 125 dec ecx |
| 126 jnz .Lrep1 |
| 127 .Lrep1_end: |
| 94 popq r14 | 128 popq r14 |
| 95 popq r13 | 129 popq r13 |
| 96 popq r12 | 130 popq r12 |
| 97 popq rax | 131 popq rax |
| 98 pop r11 | 132 pop r11 |
| 99 nacljmp r11d, r15 | 133 nacljmp r11d, r15 |
| 100 | 134 |
| 101 | 135 |
| 102 byte_copy: | 136 .Lbyte_copy: |
| 103 movl edx, ecx | 137 testl edx, edx |
| 104 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 138 jz .Lbyte_copy_end |
| 139 .Lbyte_copy_loop: |
| 140 movb %nacl:(r15, rsi), r8b |
| 141 inc esi |
| 142 movb r8b, %nacl:(r15, rdi) |
| 143 inc edi |
| 144 dec edx |
| 145 jnz .Lbyte_copy_loop |
| 146 .Lbyte_copy_end: |
| 105 pop r11 | 147 pop r11 |
| 106 nacljmp r11d, r15 | 148 nacljmp r11d, r15 |
| 107 | 149 |
| 108 | 150 |
| 109 quadword_copy: | 151 .Lquadword_copy: |
| 110 movl edx, ecx | 152 movl edx, ecx |
| 111 shrl $3, ecx | 153 shrl $3, ecx |
| 154 jz .Lrep2_end |
| 112 .p2align 4 | 155 .p2align 4 |
| 113 rep movsq %nacl:(rsi), %nacl:(rdi), r15 | 156 .Lrep2: |
| 114 movl edx, ecx | 157 movq %nacl:(r15, rsi), r8 |
| 115 andl $7, ecx | 158 add $8, esi |
| 116 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ | 159 movq r8, %nacl:(r15, rdi) |
| 160 add $8, edi |
| 161 dec ecx |
| 162 jnz .Lrep2 |
| 163 .Lrep2_end: |
| 164 andl $7, edx |
| 165 jz .Lrep3_end |
| 166 .Lrep3: |
| 167 /* Copy the remaining bytes */ |
| 168 movb %nacl:(r15, rsi), r8b |
| 169 inc esi |
| 170 movb r8b, %nacl:(r15, rdi) |
| 171 inc edi |
| 172 dec edx |
| 173 jnz .Lrep3 |
| 174 .Lrep3_end: |
| 117 pop r11 | 175 pop r11 |
| 118 nacljmp r11d, r15 | 176 nacljmp r11d, r15 |
| OLD | NEW |