Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * ==================================================== | 2 * ==================================================== |
| 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. | 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. |
| 4 * | 4 * |
| 5 * Permission to use, copy, modify, and distribute this | 5 * Permission to use, copy, modify, and distribute this |
| 6 * software is freely granted, provided that this notice | 6 * software is freely granted, provided that this notice |
| 7 * is preserved. | 7 * is preserved. |
| 8 * ==================================================== | 8 * ==================================================== |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "x86_64mach.h" | 11 #include "x86_64mach.h" |
| 12 | 12 |
| 13 .global SYM (memcpy) | 13 .global SYM (memcpy) |
| 14 SOTYPE_FUNCTION(memcpy) | 14 SOTYPE_FUNCTION(memcpy) |
| 15 | 15 |
| 16 SYM (memcpy): | 16 SYM (memcpy): |
| 17 movl edi, eax /* Store destination in return value */ | 17 movl edi, eax /* Store destination in return value */ |
| 18 cmpl $16, edx | 18 cmpl $16, edx |
| 19 jb byte_copy | 19 jb .Lbyte_copy |
|
Mark Seaborn
2015/02/18 20:03:38
FWIW, they already *are* local as long as there's
Derek Schuff
2015/02/18 20:47:40
OK, I guess technically the effect that I wanted i
| |
| 20 | 20 |
| 21 movl edi, r8d /* Align destination on quad word boundary */ | 21 movl edi, r8d /* Align destination on quad word boundary */ |
| 22 andl $7, r8d | 22 andl $7, r8d |
| 23 jz quadword_aligned | 23 jz .Lquadword_aligned |
| 24 movl $8, ecx | 24 movl $8, ecx |
| 25 subl r8d, ecx | 25 subl r8d, ecx |
| 26 subl ecx, edx | 26 subl ecx, edx |
| 27 | 27 |
| 28 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 28 .Lheader_loop: |
| 29 movb %nacl:(r15, rsi), r8b | |
| 30 inc esi | |
| 31 movb r8b, %nacl:(r15, rdi) | |
| 32 inc edi | |
| 33 dec ecx | |
| 34 jnz .Lheader_loop | |
| 29 | 35 |
| 30 quadword_aligned: | 36 .Lquadword_aligned: |
| 31 movl esi, esi /* We must clear top half for prefetch */ | |
| 32 cmpl $256, edx | 37 cmpl $256, edx |
| 33 jb quadword_copy | 38 jb .Lquadword_copy |
| 34 | 39 |
| 35 pushq rax | 40 pushq rax |
| 36 pushq r12 | 41 pushq r12 |
| 37 pushq r13 | 42 pushq r13 |
| 38 pushq r14 | 43 pushq r14 |
| 39 | 44 |
| 40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ | 45 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ |
| 41 shrl $7, ecx | 46 shrl $7, ecx |
| 42 | 47 |
| 48 /* Avoid revealing the sandbox base address. | |
|
Mark Seaborn
2015/02/18 20:03:38
Nit: Should this use the NaCl style for multiline
Derek Schuff
2015/02/18 20:47:40
Done.
| |
| 49 * In particular this means that we don't do the following: | |
| 50 * movq 32(r15,rsi), r11 | |
| 51 * ... | |
| 52 * movq r11, %nacl:32(r15,rdi) | |
| 53 * because the latter instruction might be reached via a direct or | |
| 54 * indirect jump when r11 contains the sandbox base address in its | |
| 55 * top 32 bits, and this would write the sandbox base address into | |
| 56 * memory. We treat r11 as a write-only register to avoid | |
| 57 * revealing the sandbox base address to user code. | |
| 58 * Instead, we spill rdx and use that. Additionally, we avoid string | |
| 59 * instructions (movs) because they leave the full 64 bits in rsi/rdi. | |
| 60 */ | |
| 61 pushq $0 | |
| 62 movl ebp, (rsp) | |
| 63 pushq rdx | |
|
Mark Seaborn
2015/02/18 20:03:38
Nit: fix operand's indentation alignment
Derek Schuff
2015/02/18 20:47:40
Done.
| |
| 43 .p2align 4 | 64 .p2align 4 |
| 44 loop: | |
| 45 prefetchnta 768 (r15,rsi) | |
| 46 prefetchnta 832 (r15,rsi) | |
| 47 | 65 |
| 48 movq %nacl: (r15,rsi), rax | 66 .Lloop: |
| 49 movq %nacl: 8 (r15,rsi), r8 | 67 naclrestbp esi, r15 |
| 50 movq %nacl: 16 (r15,rsi), r9 | 68 movq (rbp), rax |
| 51 movq %nacl: 24 (r15,rsi), r10 | 69 movq 8 (rbp), r8 |
| 52 movq %nacl: 32 (r15,rsi), r11 | 70 movq 16 (rbp), r9 |
| 53 movq %nacl: 40 (r15,rsi), r12 | 71 movq 24 (rbp), r10 |
| 54 movq %nacl: 48 (r15,rsi), r13 | 72 movq 32 (rbp), rdx |
| 55 movq %nacl: 56 (r15,rsi), r14 | 73 movq 40 (rbp), r12 |
| 74 movq 48 (rbp), r13 | |
| 75 movq 56 (rbp), r14 | |
| 56 | 76 |
| 57 movntiq rax, %nacl: (r15,rdi) | 77 naclrestbp edi, r15 |
| 58 movntiq r8 , %nacl: 8 (r15,rdi) | 78 movq rax, (rbp) |
| 59 movntiq r9 , %nacl: 16 (r15,rdi) | 79 movq r8 , 8 (rbp) |
| 60 movntiq r10, %nacl: 24 (r15,rdi) | 80 movq r9 , 16 (rbp) |
| 61 movntiq r11, %nacl: 32 (r15,rdi) | 81 movq r10, 24 (rbp) |
| 62 movntiq r12, %nacl: 40 (r15,rdi) | 82 movq rdx, 32 (rbp) |
| 63 movntiq r13, %nacl: 48 (r15,rdi) | 83 movq r12, 40 (rbp) |
| 64 movntiq r14, %nacl: 56 (r15,rdi) | 84 movq r13, 48 (rbp) |
| 85 movq r14, 56 (rbp) | |
| 65 | 86 |
| 66 movq %nacl: 64 (r15,rsi), rax | 87 naclrestbp esi, r15 |
| 67 movq %nacl: 72 (r15,rsi), r8 | 88 movq 64 (rbp), rax |
| 68 movq %nacl: 80 (r15,rsi), r9 | 89 movq 72 (rbp), r8 |
| 69 movq %nacl: 88 (r15,rsi), r10 | 90 movq 80 (rbp), r9 |
| 70 movq %nacl: 96 (r15,rsi), r11 | 91 movq 88 (rbp), r10 |
| 71 movq %nacl: 104 (r15,rsi), r12 | 92 movq 96 (rbp), rdx |
| 72 movq %nacl: 112 (r15,rsi), r13 | 93 movq 104 (rbp), r12 |
| 73 movq %nacl: 120 (r15,rsi), r14 | 94 movq 112 (rbp), r13 |
| 95 movq 120 (rbp), r14 | |
| 74 | 96 |
| 75 movntiq rax, %nacl: 64 (r15,rdi) | 97 naclrestbp edi, r15 |
| 76 movntiq r8 , %nacl: 72 (r15,rdi) | 98 movq rax, 64 (rbp) |
| 77 movntiq r9 , %nacl: 80 (r15,rdi) | 99 movq r8 , 72 (rbp) |
| 78 movntiq r10, %nacl: 88 (r15,rdi) | 100 movq r9 , 80 (rbp) |
| 79 movntiq r11, %nacl: 96 (r15,rdi) | 101 movq r10, 88 (rbp) |
| 80 movntiq r12, %nacl: 104 (r15,rdi) | 102 movq rdx, 96 (rbp) |
| 81 movntiq r13, %nacl: 112 (r15,rdi) | 103 movq r12, 104 (rbp) |
| 82 movntiq r14, %nacl: 120 (r15,rdi) | 104 movq r13, 112 (rbp) |
| 105 movq r14, 120 (rbp) | |
| 83 | 106 |
| 84 leal 128 (rsi), esi | 107 leal 128 (rsi), esi |
| 85 leal 128 (rdi), edi | 108 leal 128 (rdi), edi |
| 86 | 109 |
| 87 dec ecx | 110 dec ecx |
| 88 jnz loop | 111 jnz .Lloop |
| 89 | 112 |
| 90 sfence | 113 popq rcx |
|
Mark Seaborn
2015/02/18 20:03:38
Please note the removal of this sfence in the comm
Derek Schuff
2015/02/18 20:47:40
correct, and done.
|
Mark Seaborn
2015/02/18 20:03:38
Earlier you push rdx but here you pop rcx. Not su
Derek Schuff
2015/02/18 20:47:40
This replaces line 91 of the original which just c
Derek Schuff
2015/02/18 20:49:24
(I forgot to add "and rdx is not used anymore in t
|
| 91 movl edx, ecx | 114 popq rax |
| 115 naclrestbp eax, r15 | |
| 92 andl $127, ecx | 116 andl $127, ecx |
| 93 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 117 jz .Lrep1_end |
| 118 .Lrep1: | |
| 119 movb %nacl:(r15, rsi), r8b | |
| 120 inc esi | |
| 121 movb r8b, %nacl:(r15, rdi) | |
| 122 inc edi | |
| 123 dec ecx | |
| 124 jnz .Lrep1 | |
| 125 .Lrep1_end: | |
| 94 popq r14 | 126 popq r14 |
| 95 popq r13 | 127 popq r13 |
| 96 popq r12 | 128 popq r12 |
| 97 popq rax | 129 popq rax |
| 98 pop r11 | 130 pop r11 |
| 99 nacljmp r11d, r15 | 131 nacljmp r11d, r15 |
| 100 | 132 |
| 101 | 133 |
| 102 byte_copy: | 134 .Lbyte_copy: |
| 103 movl edx, ecx | 135 testl edx, edx |
| 104 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 136 jz .Lbyte_copy_end |
| 137 .Lbyte_copy_loop: | |
| 138 movb %nacl:(r15, rsi), r8b | |
| 139 inc esi | |
| 140 movb r8b, %nacl:(r15, rdi) | |
| 141 inc edi | |
| 142 dec edx | |
| 143 jnz .Lbyte_copy_loop | |
| 144 .Lbyte_copy_end: | |
| 105 pop r11 | 145 pop r11 |
| 106 nacljmp r11d, r15 | 146 nacljmp r11d, r15 |
| 107 | 147 |
| 108 | 148 |
| 109 quadword_copy: | 149 .Lquadword_copy: |
| 110 movl edx, ecx | 150 movl edx, ecx |
| 111 shrl $3, ecx | 151 shrl $3, ecx |
| 152 jz .Lrep2_end | |
| 112 .p2align 4 | 153 .p2align 4 |
| 113 rep movsq %nacl:(rsi), %nacl:(rdi), r15 | 154 .Lrep2: |
| 155 movq %nacl:(r15, rsi), r8 | |
| 156 add $8, esi | |
| 157 movq r8, %nacl:(r15, rdi) | |
| 158 add $8, edi | |
| 159 dec ecx | |
| 160 jnz .Lrep2 | |
| 161 .Lrep2_end: | |
| 114 movl edx, ecx | 162 movl edx, ecx |
| 115 andl $7, ecx | 163 andl $7, ecx |
| 116 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ | 164 jz .Lrep3_end |
| 165 .Lrep3: | |
| 166 /* Copy the remaining bytes */ | |
| 167 movb %nacl:(r15, rsi), r8b | |
| 168 inc esi | |
| 169 movb r8b, %nacl:(r15, rdi) | |
| 170 inc edi | |
| 171 dec ecx | |
| 172 jnz .Lrep3 | |
| 173 .Lrep3_end: | |
| 117 pop r11 | 174 pop r11 |
| 118 nacljmp r11d, r15 | 175 nacljmp r11d, r15 |
| OLD | NEW |