OLD | NEW |
(Empty) | |
| 1 #include "arm_arch.h" |
| 2 |
| 3 .text |
| 4 .arch armv8-a+crypto |
| 5 .global gcm_init_v8 |
| 6 .type gcm_init_v8,%function |
| 7 .align 4 |
| 8 gcm_init_v8: |
| 9 ld1 {v17.2d},[x1] //load H |
| 10 movi v16.16b,#0xe1 |
| 11 ext v3.16b,v17.16b,v17.16b,#8 |
| 12 shl v16.2d,v16.2d,#57 |
| 13 ushr v18.2d,v16.2d,#63 |
| 14 ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 |
| 15 dup v17.4s,v17.s[1] |
| 16 ushr v19.2d,v3.2d,#63 |
| 17 sshr v17.4s,v17.4s,#31 //broadcast carry bit |
| 18 and v19.16b,v19.16b,v16.16b |
| 19 shl v3.2d,v3.2d,#1 |
| 20 ext v19.16b,v19.16b,v19.16b,#8 |
| 21 and v16.16b,v16.16b,v17.16b |
| 22 orr v3.16b,v3.16b,v19.16b //H<<<=1 |
| 23 eor v3.16b,v3.16b,v16.16b //twisted H |
| 24 st1 {v3.2d},[x0] |
| 25 |
| 26 ret |
| 27 .size gcm_init_v8,.-gcm_init_v8 |
| 28 |
| 29 .global gcm_gmult_v8 |
| 30 .type gcm_gmult_v8,%function |
| 31 .align 4 |
| 32 gcm_gmult_v8: |
| 33 ld1 {v17.2d},[x0] //load Xi |
| 34 movi v19.16b,#0xe1 |
| 35 ld1 {v20.2d},[x1] //load twisted H |
| 36 shl v19.2d,v19.2d,#57 |
| 37 #ifndef __ARMEB__ |
| 38 rev64 v17.16b,v17.16b |
| 39 #endif |
| 40 ext v21.16b,v20.16b,v20.16b,#8 |
| 41 mov x3,#0 |
| 42 ext v3.16b,v17.16b,v17.16b,#8 |
| 43 mov x12,#0 |
| 44 eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processi
ng |
| 45 mov x2,x0 |
| 46 b .Lgmult_v8 |
| 47 .size gcm_gmult_v8,.-gcm_gmult_v8 |
| 48 |
| 49 .global gcm_ghash_v8 |
| 50 .type gcm_ghash_v8,%function |
| 51 .align 4 |
| 52 gcm_ghash_v8: |
| 53 ld1 {v0.2d},[x0] //load [rotated] Xi |
| 54 subs x3,x3,#16 |
| 55 movi v19.16b,#0xe1 |
| 56 mov x12,#16 |
| 57 ld1 {v20.2d},[x1] //load twisted H |
| 58 csel x12,xzr,x12,eq |
| 59 ext v0.16b,v0.16b,v0.16b,#8 |
| 60 shl v19.2d,v19.2d,#57 |
| 61 ld1 {v17.2d},[x2],x12 //load [rotated] inp |
| 62 ext v21.16b,v20.16b,v20.16b,#8 |
| 63 #ifndef __ARMEB__ |
| 64 rev64 v0.16b,v0.16b |
| 65 rev64 v17.16b,v17.16b |
| 66 #endif |
| 67 eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processi
ng |
| 68 ext v3.16b,v17.16b,v17.16b,#8 |
| 69 b .Loop_v8 |
| 70 |
| 71 .align 4 |
| 72 .Loop_v8: |
| 73 ext v18.16b,v0.16b,v0.16b,#8 |
| 74 eor v3.16b,v3.16b,v0.16b //inp^=Xi |
| 75 eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp
^Xi |
| 76 |
| 77 .Lgmult_v8: |
| 78 pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo |
| 79 eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processi
ng |
| 80 pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi |
| 81 subs x3,x3,#16 |
| 82 pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) |
| 83 csel x12,xzr,x12,eq |
| 84 |
| 85 ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post
-processing |
| 86 eor v18.16b,v0.16b,v2.16b |
| 87 eor v1.16b,v1.16b,v17.16b |
| 88 ld1 {v17.2d},[x2],x12 //load [rotated] inp |
| 89 eor v1.16b,v1.16b,v18.16b |
| 90 pmull v18.1q,v0.1d,v19.1d //1st phase |
| 91 |
| 92 ins v2.d[0],v1.d[1] |
| 93 ins v1.d[1],v0.d[0] |
| 94 #ifndef __ARMEB__ |
| 95 rev64 v17.16b,v17.16b |
| 96 #endif |
| 97 eor v0.16b,v1.16b,v18.16b |
| 98 ext v3.16b,v17.16b,v17.16b,#8 |
| 99 |
| 100 ext v18.16b,v0.16b,v0.16b,#8 //2nd phase |
| 101 pmull v0.1q,v0.1d,v19.1d |
| 102 eor v18.16b,v18.16b,v2.16b |
| 103 eor v0.16b,v0.16b,v18.16b |
| 104 b.hs .Loop_v8 |
| 105 |
| 106 #ifndef __ARMEB__ |
| 107 rev64 v0.16b,v0.16b |
| 108 #endif |
| 109 ext v0.16b,v0.16b,v0.16b,#8 |
| 110 st1 {v0.2d},[x0] //write out Xi |
| 111 |
| 112 ret |
| 113 .size gcm_ghash_v8,.-gcm_ghash_v8 |
| 114 .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>" |
| 115 .align 2 |
OLD | NEW |