Index: third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S |
diff --git a/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S b/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S |
index fa09ac0e6790d0658666041de48c80668a98d81c..ba3779502e144c17e6cda21805d406364ee85e96 100644 |
--- a/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S |
+++ b/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S |
@@ -47,12 +47,12 @@ |
#if __ARM_ARCH__<7 |
.code 32 |
#else |
-.syntax unified |
-# ifdef __thumb2__ |
+.syntax unified |
+# if defined(__thumb2__) && !defined(__APPLE__) |
# define adrl adr |
.thumb |
# else |
-.code 32 |
+.code 32 |
# endif |
#endif |
@@ -79,13 +79,14 @@ K256: |
.word 0 @ terminator |
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
.LOPENSSL_armcap: |
-.word OPENSSL_armcap_P-sha256_block_data_order |
+.word OPENSSL_armcap_P-.Lsha256_block_data_order |
#endif |
.align 5 |
-.global sha256_block_data_order |
+.globl sha256_block_data_order |
.type sha256_block_data_order,%function |
sha256_block_data_order: |
+.Lsha256_block_data_order: |
#if __ARM_ARCH__<7 |
sub r3,pc,#8 @ sha256_block_data_order |
#else |
@@ -94,6 +95,9 @@ sha256_block_data_order: |
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
ldr r12,.LOPENSSL_armcap |
ldr r12,[r3,r12] @ OPENSSL_armcap_P |
+#ifdef __APPLE__ |
+ ldr r12,[r12] |
+#endif |
tst r12,#ARMV8_SHA256 |
bne .LARMv8 |
tst r12,#ARMV7_NEON |
@@ -1857,64 +1861,64 @@ sha256_block_data_order: |
add sp,sp,#19*4 @ destroy frame |
#if __ARM_ARCH__>=5 |
- ldmia sp!,{r4-r11,pc} |
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} |
#else |
- ldmia sp!,{r4-r11,lr} |
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} |
tst lr,#1 |
moveq pc,lr @ be binary compatible with V4, yet |
- .word 0xe12fff1e @ interoperable with Thumb ISA:-) |
+.word 0xe12fff1e @ interoperable with Thumb ISA:-) |
#endif |
.size sha256_block_data_order,.-sha256_block_data_order |
#if __ARM_MAX_ARCH__>=7 |
.arch armv7-a |
.fpu neon |
-.global sha256_block_data_order_neon |
+.globl sha256_block_data_order_neon |
.type sha256_block_data_order_neon,%function |
.align 4 |
sha256_block_data_order_neon: |
.LNEON: |
- stmdb sp!,{r4-r12,lr} |
+ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} |
sub r11,sp,#16*4+16 |
- adrl r14,K256 |
+ adr r14,K256 |
bic r11,r11,#15 @ align for 128-bit stores |
mov r12,sp |
mov sp,r11 @ alloca |
add r2,r1,r2,lsl#6 @ len to point at the end of inp |
- vld1.8 {q0},[r1]! |
- vld1.8 {q1},[r1]! |
- vld1.8 {q2},[r1]! |
- vld1.8 {q3},[r1]! |
- vld1.32 {q8},[r14,:128]! |
- vld1.32 {q9},[r14,:128]! |
- vld1.32 {q10},[r14,:128]! |
- vld1.32 {q11},[r14,:128]! |
+ vld1.8 {q0},[r1]! |
+ vld1.8 {q1},[r1]! |
+ vld1.8 {q2},[r1]! |
+ vld1.8 {q3},[r1]! |
+ vld1.32 {q8},[r14,:128]! |
+ vld1.32 {q9},[r14,:128]! |
+ vld1.32 {q10},[r14,:128]! |
+ vld1.32 {q11},[r14,:128]! |
vrev32.8 q0,q0 @ yes, even on |
- str r0,[sp,#64] |
+ str r0,[sp,#64] |
vrev32.8 q1,q1 @ big-endian |
- str r1,[sp,#68] |
- mov r1,sp |
+ str r1,[sp,#68] |
+ mov r1,sp |
vrev32.8 q2,q2 |
- str r2,[sp,#72] |
+ str r2,[sp,#72] |
vrev32.8 q3,q3 |
- str r12,[sp,#76] @ save original sp |
+ str r12,[sp,#76] @ save original sp |
vadd.i32 q8,q8,q0 |
vadd.i32 q9,q9,q1 |
- vst1.32 {q8},[r1,:128]! |
+ vst1.32 {q8},[r1,:128]! |
vadd.i32 q10,q10,q2 |
- vst1.32 {q9},[r1,:128]! |
+ vst1.32 {q9},[r1,:128]! |
vadd.i32 q11,q11,q3 |
- vst1.32 {q10},[r1,:128]! |
- vst1.32 {q11},[r1,:128]! |
+ vst1.32 {q10},[r1,:128]! |
+ vst1.32 {q11},[r1,:128]! |
- ldmia r0,{r4-r11} |
- sub r1,r1,#64 |
- ldr r2,[sp,#0] |
- eor r12,r12,r12 |
- eor r3,r5,r6 |
- b .L_00_48 |
+ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} |
+ sub r1,r1,#64 |
+ ldr r2,[sp,#0] |
+ eor r12,r12,r12 |
+ eor r3,r5,r6 |
+ b .L_00_48 |
.align 4 |
.L_00_48: |
@@ -2315,19 +2319,19 @@ sha256_block_data_order_neon: |
sub r1,r1,#64 |
bne .L_00_48 |
- ldr r1,[sp,#68] |
- ldr r0,[sp,#72] |
- sub r14,r14,#256 @ rewind r14 |
- teq r1,r0 |
- it eq |
- subeq r1,r1,#64 @ avoid SEGV |
- vld1.8 {q0},[r1]! @ load next input block |
- vld1.8 {q1},[r1]! |
- vld1.8 {q2},[r1]! |
- vld1.8 {q3},[r1]! |
- it ne |
- strne r1,[sp,#68] |
- mov r1,sp |
+ ldr r1,[sp,#68] |
+ ldr r0,[sp,#72] |
+ sub r14,r14,#256 @ rewind r14 |
+ teq r1,r0 |
+ it eq |
+ subeq r1,r1,#64 @ avoid SEGV |
+ vld1.8 {q0},[r1]! @ load next input block |
+ vld1.8 {q1},[r1]! |
+ vld1.8 {q2},[r1]! |
+ vld1.8 {q3},[r1]! |
+ it ne |
+ strne r1,[sp,#68] |
+ mov r1,sp |
add r11,r11,r2 |
eor r2,r9,r10 |
eor r0,r8,r8,ror#5 |
@@ -2637,7 +2641,7 @@ sha256_block_data_order_neon: |
str r6,[r2],#4 |
add r11,r11,r1 |
str r7,[r2],#4 |
- stmia r2,{r8-r11} |
+ stmia r2,{r8,r9,r10,r11} |
ittte ne |
movne r1,sp |
@@ -2648,12 +2652,12 @@ sha256_block_data_order_neon: |
eorne r3,r5,r6 |
bne .L_00_48 |
- ldmia sp!,{r4-r12,pc} |
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} |
.size sha256_block_data_order_neon,.-sha256_block_data_order_neon |
#endif |
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
-# ifdef __thumb2__ |
+# if defined(__thumb2__) && !defined(__APPLE__) |
# define INST(a,b,c,d) .byte c,d|0xc,a,b |
# else |
# define INST(a,b,c,d) .byte a,b,c,d |
@@ -2664,7 +2668,9 @@ sha256_block_data_order_neon: |
sha256_block_data_order_armv8: |
.LARMv8: |
vld1.32 {q0,q1},[r0] |
-# ifdef __thumb2__ |
+# ifdef __APPLE__ |
+ sub r3,r3,#256+32 |
+# elif defined(__thumb2__) |
adr r3,.LARMv8 |
sub r3,r3,#.LARMv8-K256 |
# else |
@@ -2673,137 +2679,138 @@ sha256_block_data_order_armv8: |
add r2,r1,r2,lsl#6 @ len to point at the end of inp |
.Loop_v8: |
- vld1.8 {q8-q9},[r1]! |
- vld1.8 {q10-q11},[r1]! |
- vld1.32 {q12},[r3]! |
+ vld1.8 {q8,q9},[r1]! |
+ vld1.8 {q10,q11},[r1]! |
+ vld1.32 {q12},[r3]! |
vrev32.8 q8,q8 |
vrev32.8 q9,q9 |
vrev32.8 q10,q10 |
vrev32.8 q11,q11 |
- vmov q14,q0 @ offload |
- vmov q15,q1 |
- teq r1,r2 |
- vld1.32 {q13},[r3]! |
+ vmov q14,q0 @ offload |
+ vmov q15,q1 |
+ teq r1,r2 |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q8 |
INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q9 |
INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q10 |
INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q11 |
INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q8 |
INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q9 |
INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q10 |
INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q11 |
INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q8 |
INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q9 |
INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q10 |
INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q11 |
INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 |
- vld1.32 {q13},[r3]! |
+ vld1.32 {q13},[r3]! |
vadd.i32 q12,q12,q8 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
- vld1.32 {q12},[r3]! |
+ vld1.32 {q12},[r3]! |
vadd.i32 q13,q13,q9 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
- vld1.32 {q13},[r3] |
+ vld1.32 {q13},[r3] |
vadd.i32 q12,q12,q10 |
- sub r3,r3,#256-16 @ rewind |
- vmov q2,q0 |
+ sub r3,r3,#256-16 @ rewind |
+ vmov q2,q0 |
INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 |
INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 |
vadd.i32 q13,q13,q11 |
- vmov q2,q0 |
+ vmov q2,q0 |
INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 |
INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 |
vadd.i32 q0,q0,q14 |
vadd.i32 q1,q1,q15 |
- it ne |
- bne .Loop_v8 |
+ it ne |
+ bne .Loop_v8 |
- vst1.32 {q0,q1},[r0] |
+ vst1.32 {q0,q1},[r0] |
bx lr @ bx lr |
.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 |
#endif |
-.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" |
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
+.align 2 |
.align 2 |
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) |
-.comm OPENSSL_armcap_P,4,4 |
-.hidden OPENSSL_armcap_P |
+.comm OPENSSL_armcap_P,4,4 |
+.hidden OPENSSL_armcap_P |
#endif |