| Index: third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx.S
|
| diff --git a/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx.S b/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx.S
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1bfb26340a6e9e49641c6fc5aa006666e8cfd118
|
| --- /dev/null
|
| +++ b/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx.S
|
| @@ -0,0 +1,115 @@
|
| +#include "arm_arch.h"
|
| +
|
| +.text
|
| +.arch armv8-a+crypto
|
| +.global gcm_init_v8
|
| +.type gcm_init_v8,%function
|
| +.align 4
|
| +gcm_init_v8:
|
| + ld1 {v17.2d},[x1] //load H
|
| + movi v16.16b,#0xe1
|
| + ext v3.16b,v17.16b,v17.16b,#8
|
| + shl v16.2d,v16.2d,#57
|
| + ushr v18.2d,v16.2d,#63
|
| + ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01
|
| + dup v17.4s,v17.s[1]
|
| + ushr v19.2d,v3.2d,#63
|
| + sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
| + and v19.16b,v19.16b,v16.16b
|
| + shl v3.2d,v3.2d,#1
|
| + ext v19.16b,v19.16b,v19.16b,#8
|
| + and v16.16b,v16.16b,v17.16b
|
| + orr v3.16b,v3.16b,v19.16b //H<<<=1
|
| + eor v3.16b,v3.16b,v16.16b //twisted H
|
| + st1 {v3.2d},[x0]
|
| +
|
| + ret
|
| +.size gcm_init_v8,.-gcm_init_v8
|
| +
|
| +.global gcm_gmult_v8
|
| +.type gcm_gmult_v8,%function
|
| +.align 4
|
| +gcm_gmult_v8:
|
| + ld1 {v17.2d},[x0] //load Xi
|
| + movi v19.16b,#0xe1
|
| + ld1 {v20.2d},[x1] //load twisted H
|
| + shl v19.2d,v19.2d,#57
|
| +#ifndef __ARMEB__
|
| + rev64 v17.16b,v17.16b
|
| +#endif
|
| + ext v21.16b,v20.16b,v20.16b,#8
|
| + mov x3,#0
|
| + ext v3.16b,v17.16b,v17.16b,#8
|
| + mov x12,#0
|
| + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
|
| + mov x2,x0
|
| + b .Lgmult_v8
|
| +.size gcm_gmult_v8,.-gcm_gmult_v8
|
| +
|
| +.global gcm_ghash_v8
|
| +.type gcm_ghash_v8,%function
|
| +.align 4
|
| +gcm_ghash_v8:
|
| + ld1 {v0.2d},[x0] //load [rotated] Xi
|
| + subs x3,x3,#16
|
| + movi v19.16b,#0xe1
|
| + mov x12,#16
|
| + ld1 {v20.2d},[x1] //load twisted H
|
| + csel x12,xzr,x12,eq
|
| + ext v0.16b,v0.16b,v0.16b,#8
|
| + shl v19.2d,v19.2d,#57
|
| + ld1 {v17.2d},[x2],x12 //load [rotated] inp
|
| + ext v21.16b,v20.16b,v20.16b,#8
|
| +#ifndef __ARMEB__
|
| + rev64 v0.16b,v0.16b
|
| + rev64 v17.16b,v17.16b
|
| +#endif
|
| + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
|
| + ext v3.16b,v17.16b,v17.16b,#8
|
| + b .Loop_v8
|
| +
|
| +.align 4
|
| +.Loop_v8:
|
| + ext v18.16b,v0.16b,v0.16b,#8
|
| + eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
| + eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi
|
| +
|
| +.Lgmult_v8:
|
| + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
| + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
| + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
| + subs x3,x3,#16
|
| + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
| + csel x12,xzr,x12,eq
|
| +
|
| + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
| + eor v18.16b,v0.16b,v2.16b
|
| + eor v1.16b,v1.16b,v17.16b
|
| + ld1 {v17.2d},[x2],x12 //load [rotated] inp
|
| + eor v1.16b,v1.16b,v18.16b
|
| + pmull v18.1q,v0.1d,v19.1d //1st phase
|
| +
|
| + ins v2.d[0],v1.d[1]
|
| + ins v1.d[1],v0.d[0]
|
| +#ifndef __ARMEB__
|
| + rev64 v17.16b,v17.16b
|
| +#endif
|
| + eor v0.16b,v1.16b,v18.16b
|
| + ext v3.16b,v17.16b,v17.16b,#8
|
| +
|
| + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
| + pmull v0.1q,v0.1d,v19.1d
|
| + eor v18.16b,v18.16b,v2.16b
|
| + eor v0.16b,v0.16b,v18.16b
|
| + b.hs .Loop_v8
|
| +
|
| +#ifndef __ARMEB__
|
| + rev64 v0.16b,v0.16b
|
| +#endif
|
| + ext v0.16b,v0.16b,v0.16b,#8
|
| + st1 {v0.2d},[x0] //write out Xi
|
| +
|
| + ret
|
| +.size gcm_ghash_v8,.-gcm_ghash_v8
|
| +.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
|
| +.align 2
|
|
|