Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: openssl/crypto/modes/asm/ghash-armv4.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/modes/asm/ghash-alpha.pl ('k') | openssl/crypto/modes/asm/ghash-armv4.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #include "arm_arch.h"
2
3 .text
4 .code 32
5
6 .type rem_4bit,%object
7 .align 5
8 rem_4bit:
9 .short 0x0000,0x1C20,0x3840,0x2460
10 .short 0x7080,0x6CA0,0x48C0,0x54E0
11 .short 0xE100,0xFD20,0xD940,0xC560
12 .short 0x9180,0x8DA0,0xA9C0,0xB5E0
13 .size rem_4bit,.-rem_4bit
14
15 .type rem_4bit_get,%function
16 rem_4bit_get:
17 sub r2,pc,#8
18 sub r2,r2,#32 @ &rem_4bit
19 b .Lrem_4bit_got
20 nop
21 .size rem_4bit_get,.-rem_4bit_get
22
23 .global gcm_ghash_4bit
24 .type gcm_ghash_4bit,%function
25 gcm_ghash_4bit:
26 sub r12,pc,#8
27 add r3,r2,r3 @ r3 to point at the end
28 stmdb sp!,{r3-r11,lr} @ save r3/end too
29 sub r12,r12,#48 @ &rem_4bit
30
31 ldmia r12,{r4-r11} @ copy rem_4bit ...
32 stmdb sp!,{r4-r11} @ ... to stack
33
34 ldrb r12,[r2,#15]
35 ldrb r14,[r0,#15]
36 .Louter:
37 eor r12,r12,r14
38 and r14,r12,#0xf0
39 and r12,r12,#0x0f
40 mov r3,#14
41
42 add r7,r1,r12,lsl#4
43 ldmia r7,{r4-r7} @ load Htbl[nlo]
44 add r11,r1,r14
45 ldrb r12,[r2,#14]
46
47 and r14,r4,#0xf @ rem
48 ldmia r11,{r8-r11} @ load Htbl[nhi]
49 add r14,r14,r14
50 eor r4,r8,r4,lsr#4
51 ldrh r8,[sp,r14] @ rem_4bit[rem]
52 eor r4,r4,r5,lsl#28
53 ldrb r14,[r0,#14]
54 eor r5,r9,r5,lsr#4
55 eor r5,r5,r6,lsl#28
56 eor r6,r10,r6,lsr#4
57 eor r6,r6,r7,lsl#28
58 eor r7,r11,r7,lsr#4
59 eor r12,r12,r14
60 and r14,r12,#0xf0
61 and r12,r12,#0x0f
62 eor r7,r7,r8,lsl#16
63
64 .Linner:
65 add r11,r1,r12,lsl#4
66 and r12,r4,#0xf @ rem
67 subs r3,r3,#1
68 add r12,r12,r12
69 ldmia r11,{r8-r11} @ load Htbl[nlo]
70 eor r4,r8,r4,lsr#4
71 eor r4,r4,r5,lsl#28
72 eor r5,r9,r5,lsr#4
73 eor r5,r5,r6,lsl#28
74 ldrh r8,[sp,r12] @ rem_4bit[rem]
75 eor r6,r10,r6,lsr#4
76 ldrplb r12,[r2,r3]
77 eor r6,r6,r7,lsl#28
78 eor r7,r11,r7,lsr#4
79
80 add r11,r1,r14
81 and r14,r4,#0xf @ rem
82 eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
83 add r14,r14,r14
84 ldmia r11,{r8-r11} @ load Htbl[nhi]
85 eor r4,r8,r4,lsr#4
86 ldrplb r8,[r0,r3]
87 eor r4,r4,r5,lsl#28
88 eor r5,r9,r5,lsr#4
89 ldrh r9,[sp,r14]
90 eor r5,r5,r6,lsl#28
91 eor r6,r10,r6,lsr#4
92 eor r6,r6,r7,lsl#28
93 eorpl r12,r12,r8
94 eor r7,r11,r7,lsr#4
95 andpl r14,r12,#0xf0
96 andpl r12,r12,#0x0f
97 eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
98 bpl .Linner
99
100 ldr r3,[sp,#32] @ re-load r3/end
101 add r2,r2,#16
102 mov r14,r4
103 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
104 rev r4,r4
105 str r4,[r0,#12]
106 #elif defined(__ARMEB__)
107 str r4,[r0,#12]
108 #else
109 mov r9,r4,lsr#8
110 strb r4,[r0,#12+3]
111 mov r10,r4,lsr#16
112 strb r9,[r0,#12+2]
113 mov r11,r4,lsr#24
114 strb r10,[r0,#12+1]
115 strb r11,[r0,#12]
116 #endif
117 cmp r2,r3
118 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
119 rev r5,r5
120 str r5,[r0,#8]
121 #elif defined(__ARMEB__)
122 str r5,[r0,#8]
123 #else
124 mov r9,r5,lsr#8
125 strb r5,[r0,#8+3]
126 mov r10,r5,lsr#16
127 strb r9,[r0,#8+2]
128 mov r11,r5,lsr#24
129 strb r10,[r0,#8+1]
130 strb r11,[r0,#8]
131 #endif
132 ldrneb r12,[r2,#15]
133 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
134 rev r6,r6
135 str r6,[r0,#4]
136 #elif defined(__ARMEB__)
137 str r6,[r0,#4]
138 #else
139 mov r9,r6,lsr#8
140 strb r6,[r0,#4+3]
141 mov r10,r6,lsr#16
142 strb r9,[r0,#4+2]
143 mov r11,r6,lsr#24
144 strb r10,[r0,#4+1]
145 strb r11,[r0,#4]
146 #endif
147
148 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
149 rev r7,r7
150 str r7,[r0,#0]
151 #elif defined(__ARMEB__)
152 str r7,[r0,#0]
153 #else
154 mov r9,r7,lsr#8
155 strb r7,[r0,#0+3]
156 mov r10,r7,lsr#16
157 strb r9,[r0,#0+2]
158 mov r11,r7,lsr#24
159 strb r10,[r0,#0+1]
160 strb r11,[r0,#0]
161 #endif
162
163 bne .Louter
164
165 add sp,sp,#36
166 #if __ARM_ARCH__>=5
167 ldmia sp!,{r4-r11,pc}
168 #else
169 ldmia sp!,{r4-r11,lr}
170 tst lr,#1
171 moveq pc,lr @ be binary compatible with V4, yet
172 .word 0xe12fff1e @ interoperable with Thumb ISA:- )
173 #endif
174 .size gcm_ghash_4bit,.-gcm_ghash_4bit
175
176 .global gcm_gmult_4bit
177 .type gcm_gmult_4bit,%function
178 gcm_gmult_4bit:
179 stmdb sp!,{r4-r11,lr}
180 ldrb r12,[r0,#15]
181 b rem_4bit_get
182 .Lrem_4bit_got:
183 and r14,r12,#0xf0
184 and r12,r12,#0x0f
185 mov r3,#14
186
187 add r7,r1,r12,lsl#4
188 ldmia r7,{r4-r7} @ load Htbl[nlo]
189 ldrb r12,[r0,#14]
190
191 add r11,r1,r14
192 and r14,r4,#0xf @ rem
193 ldmia r11,{r8-r11} @ load Htbl[nhi]
194 add r14,r14,r14
195 eor r4,r8,r4,lsr#4
196 ldrh r8,[r2,r14] @ rem_4bit[rem]
197 eor r4,r4,r5,lsl#28
198 eor r5,r9,r5,lsr#4
199 eor r5,r5,r6,lsl#28
200 eor r6,r10,r6,lsr#4
201 eor r6,r6,r7,lsl#28
202 eor r7,r11,r7,lsr#4
203 and r14,r12,#0xf0
204 eor r7,r7,r8,lsl#16
205 and r12,r12,#0x0f
206
207 .Loop:
208 add r11,r1,r12,lsl#4
209 and r12,r4,#0xf @ rem
210 subs r3,r3,#1
211 add r12,r12,r12
212 ldmia r11,{r8-r11} @ load Htbl[nlo]
213 eor r4,r8,r4,lsr#4
214 eor r4,r4,r5,lsl#28
215 eor r5,r9,r5,lsr#4
216 eor r5,r5,r6,lsl#28
217 ldrh r8,[r2,r12] @ rem_4bit[rem]
218 eor r6,r10,r6,lsr#4
219 ldrplb r12,[r0,r3]
220 eor r6,r6,r7,lsl#28
221 eor r7,r11,r7,lsr#4
222
223 add r11,r1,r14
224 and r14,r4,#0xf @ rem
225 eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
226 add r14,r14,r14
227 ldmia r11,{r8-r11} @ load Htbl[nhi]
228 eor r4,r8,r4,lsr#4
229 eor r4,r4,r5,lsl#28
230 eor r5,r9,r5,lsr#4
231 ldrh r8,[r2,r14] @ rem_4bit[rem]
232 eor r5,r5,r6,lsl#28
233 eor r6,r10,r6,lsr#4
234 eor r6,r6,r7,lsl#28
235 eor r7,r11,r7,lsr#4
236 andpl r14,r12,#0xf0
237 andpl r12,r12,#0x0f
238 eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
239 bpl .Loop
240 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
241 rev r4,r4
242 str r4,[r0,#12]
243 #elif defined(__ARMEB__)
244 str r4,[r0,#12]
245 #else
246 mov r9,r4,lsr#8
247 strb r4,[r0,#12+3]
248 mov r10,r4,lsr#16
249 strb r9,[r0,#12+2]
250 mov r11,r4,lsr#24
251 strb r10,[r0,#12+1]
252 strb r11,[r0,#12]
253 #endif
254
255 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
256 rev r5,r5
257 str r5,[r0,#8]
258 #elif defined(__ARMEB__)
259 str r5,[r0,#8]
260 #else
261 mov r9,r5,lsr#8
262 strb r5,[r0,#8+3]
263 mov r10,r5,lsr#16
264 strb r9,[r0,#8+2]
265 mov r11,r5,lsr#24
266 strb r10,[r0,#8+1]
267 strb r11,[r0,#8]
268 #endif
269
270 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
271 rev r6,r6
272 str r6,[r0,#4]
273 #elif defined(__ARMEB__)
274 str r6,[r0,#4]
275 #else
276 mov r9,r6,lsr#8
277 strb r6,[r0,#4+3]
278 mov r10,r6,lsr#16
279 strb r9,[r0,#4+2]
280 mov r11,r6,lsr#24
281 strb r10,[r0,#4+1]
282 strb r11,[r0,#4]
283 #endif
284
285 #if __ARM_ARCH__>=7 && defined(__ARMEL__)
286 rev r7,r7
287 str r7,[r0,#0]
288 #elif defined(__ARMEB__)
289 str r7,[r0,#0]
290 #else
291 mov r9,r7,lsr#8
292 strb r7,[r0,#0+3]
293 mov r10,r7,lsr#16
294 strb r9,[r0,#0+2]
295 mov r11,r7,lsr#24
296 strb r10,[r0,#0+1]
297 strb r11,[r0,#0]
298 #endif
299
300 #if __ARM_ARCH__>=5
301 ldmia sp!,{r4-r11,pc}
302 #else
303 ldmia sp!,{r4-r11,lr}
304 tst lr,#1
305 moveq pc,lr @ be binary compatible with V4, yet
306 .word 0xe12fff1e @ interoperable with Thumb ISA:- )
307 #endif
308 .size gcm_gmult_4bit,.-gcm_gmult_4bit
309 #if __ARM_ARCH__>=7
310 .fpu neon
311
312 .global gcm_gmult_neon
313 .type gcm_gmult_neon,%function
314 .align 4
315 gcm_gmult_neon:
316 sub r1,#16 @ point at H in GCM128_CTX
317 vld1.64 d29,[r0,:64]!@ load Xi
318 vmov.i32 d5,#0xe1 @ our irreducible polynomial
319 vld1.64 d28,[r0,:64]!
320 vshr.u64 d5,#32
321 vldmia r1,{d0-d1} @ load H
322 veor q12,q12
323 #ifdef __ARMEL__
324 vrev64.8 q14,q14
325 #endif
326 veor q13,q13
327 veor q11,q11
328 mov r1,#16
329 veor q10,q10
330 mov r3,#16
331 veor d2,d2
332 vdup.8 d4,d28[0] @ broadcast lowest byte
333 b .Linner_neon
334 .size gcm_gmult_neon,.-gcm_gmult_neon
335
336 .global gcm_ghash_neon
337 .type gcm_ghash_neon,%function
338 .align 4
339 gcm_ghash_neon:
340 vld1.64 d21,[r0,:64]! @ load Xi
341 vmov.i32 d5,#0xe1 @ our irreducible polynomial
342 vld1.64 d20,[r0,:64]!
343 vshr.u64 d5,#32
344 vldmia r0,{d0-d1} @ load H
345 veor q12,q12
346 nop
347 #ifdef __ARMEL__
348 vrev64.8 q10,q10
349 #endif
350 .Louter_neon:
351 vld1.64 d29,[r2]! @ load inp
352 veor q13,q13
353 vld1.64 d28,[r2]!
354 veor q11,q11
355 mov r1,#16
356 #ifdef __ARMEL__
357 vrev64.8 q14,q14
358 #endif
359 veor d2,d2
360 veor q14,q10 @ inp^=Xi
361 veor q10,q10
362 vdup.8 d4,d28[0] @ broadcast lowest byte
363 .Linner_neon:
364 subs r1,r1,#1
365 vmull.p8 q9,d1,d4 @ H.lo·Xi[i]
366 vmull.p8 q8,d0,d4 @ H.hi·Xi[i]
367 vext.8 q14,q12,#1 @ IN>>=8
368
369 veor q10,q13 @ modulo-scheduled part
370 vshl.i64 d22,#48
371 vdup.8 d4,d28[0] @ broadcast lowest byte
372 veor d3,d18,d20
373
374 veor d21,d22
375 vuzp.8 q9,q8
376 vsli.8 d2,d3,#1 @ compose the "carry" byte
377 vext.8 q10,q12,#1 @ Z>>=8
378
379 vmull.p8 q11,d2,d5 @ "carry"·0xe1
380 vshr.u8 d2,d3,#7 @ save Z's bottom bit
381 vext.8 q13,q9,q12,#1 @ Qlo>>=8
382 veor q10,q8
383 bne .Linner_neon
384
385 veor q10,q13 @ modulo-scheduled artefact
386 vshl.i64 d22,#48
387 veor d21,d22
388
389 @ finalization, normalize Z:Zo
390 vand d2,d5 @ suffices to mask the bit
391 vshr.u64 d3,d20,#63
392 vshl.i64 q10,#1
393 subs r3,#16
394 vorr q10,q1 @ Z=Z:Zo<<1
395 bne .Louter_neon
396
397 #ifdef __ARMEL__
398 vrev64.8 q10,q10
399 #endif
400 sub r0,#16
401 vst1.64 d21,[r0,:64]! @ write out Xi
402 vst1.64 d20,[r0,:64]
403
404 .word 0xe12fff1e
405 .size gcm_ghash_neon,.-gcm_ghash_neon
406 #endif
407 .asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
408 .align 2
OLDNEW
« no previous file with comments | « openssl/crypto/modes/asm/ghash-alpha.pl ('k') | openssl/crypto/modes/asm/ghash-armv4.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698