Chromium Code Reviews| Index: linux-ppc64le/crypto/modes/ghashp8-ppc.S |
| diff --git a/linux-ppc64le/crypto/modes/ghashp8-ppc.S b/linux-ppc64le/crypto/modes/ghashp8-ppc.S |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..69ae1a5d98beb6088b3f443142f66751ee287cd8 |
| --- /dev/null |
| +++ b/linux-ppc64le/crypto/modes/ghashp8-ppc.S |
| @@ -0,0 +1,565 @@ |
| +.machine "any" |
| + |
| +.text |
| + |
| +.globl gcm_init_p8 |
| +.align 5 |
| +gcm_init_p8: |
| + li 0,-4096 |
| + li 8,0x10 |
| + mfspr 12,256 |
| + li 9,0x20 |
| + mtspr 256,0 |
| + li 10,0x30 |
| + .long 0x7D202699 |
| + |
| + vspltisb 8,-16 |
| + vspltisb 5,1 |
| + vaddubm 8,8,8 |
| + vxor 4,4,4 |
| + vor 8,8,5 |
| + vsldoi 8,8,4,15 |
| + vsldoi 6,4,5,1 |
| + vaddubm 8,8,8 |
| + vspltisb 7,7 |
| + vor 8,8,6 |
| + vspltb 6,9,0 |
| + vsl 9,9,5 |
| + vsrab 6,6,7 |
| + vand 6,6,8 |
| + vxor 3,9,6 |
| + |
| + vsldoi 9,3,3,8 |
| + vsldoi 8,4,8,8 |
| + vsldoi 11,4,9,8 |
| + vsldoi 10,9,4,8 |
| + |
| + .long 0x7D001F99 |
| + .long 0x7D681F99 |
| + li 8,0x40 |
| + .long 0x7D291F99 |
| + li 9,0x50 |
| + .long 0x7D4A1F99 |
| + li 10,0x60 |
| + |
| + .long 0x10035CC8 |
| + .long 0x10234CC8 |
| + .long 0x104354C8 |
| + |
| + .long 0x10E044C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x100044C8 |
| + vxor 6,6,2 |
| + vxor 16,0,6 |
| + |
| + vsldoi 17,16,16,8 |
| + vsldoi 19,4,17,8 |
| + vsldoi 18,17,4,8 |
| + |
| + .long 0x7E681F99 |
| + li 8,0x70 |
| + .long 0x7E291F99 |
| + li 9,0x80 |
| + .long 0x7E4A1F99 |
| + li 10,0x90 |
| + .long 0x10039CC8 |
| + .long 0x11B09CC8 |
| + .long 0x10238CC8 |
| + .long 0x11D08CC8 |
| + .long 0x104394C8 |
| + .long 0x11F094C8 |
| + |
| + .long 0x10E044C8 |
| + .long 0x114D44C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vsldoi 11,14,4,8 |
| + vsldoi 9,4,14,8 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + vxor 13,13,11 |
| + vxor 15,15,9 |
| + |
| + vsldoi 0,0,0,8 |
| + vsldoi 13,13,13,8 |
| + vxor 0,0,7 |
| + vxor 13,13,10 |
| + |
| + vsldoi 6,0,0,8 |
| + vsldoi 9,13,13,8 |
| + .long 0x100044C8 |
| + .long 0x11AD44C8 |
| + vxor 6,6,2 |
| + vxor 9,9,15 |
| + vxor 0,0,6 |
| + vxor 13,13,9 |
| + |
| + vsldoi 9,0,0,8 |
| + vsldoi 17,13,13,8 |
| + vsldoi 11,4,9,8 |
| + vsldoi 10,9,4,8 |
| + vsldoi 19,4,17,8 |
| + vsldoi 18,17,4,8 |
| + |
| + .long 0x7D681F99 |
| + li 8,0xa0 |
| + .long 0x7D291F99 |
| + li 9,0xb0 |
| + .long 0x7D4A1F99 |
| + li 10,0xc0 |
| + .long 0x7E681F99 |
| + .long 0x7E291F99 |
| + .long 0x7E4A1F99 |
| + |
| + mtspr 256,12 |
| + blr |
| +.long 0 |
| +.byte 0,12,0x14,0,0,0,2,0 |
| +.long 0 |
| + |
| +.globl gcm_gmult_p8 |
| +.align 5 |
| +gcm_gmult_p8: |
| + lis 0,0xfff8 |
| + li 8,0x10 |
| + mfspr 12,256 |
| + li 9,0x20 |
| + mtspr 256,0 |
| + li 10,0x30 |
| + .long 0x7C601E99 |
| + |
| + .long 0x7D682699 |
| + lvsl 12,0,0 |
| + .long 0x7D292699 |
| + vspltisb 5,0x07 |
| + .long 0x7D4A2699 |
| + vxor 12,12,5 |
| + .long 0x7D002699 |
| + vperm 3,3,3,12 |
| + vxor 4,4,4 |
| + |
| + .long 0x10035CC8 |
| + .long 0x10234CC8 |
| + .long 0x104354C8 |
| + |
| + .long 0x10E044C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x100044C8 |
| + vxor 6,6,2 |
| + vxor 0,0,6 |
| + |
| + vperm 0,0,0,12 |
| + .long 0x7C001F99 |
| + |
| + mtspr 256,12 |
| + blr |
| +.long 0 |
| +.byte 0,12,0x14,0,0,0,2,0 |
| +.long 0 |
| + |
| + |
| +.globl gcm_ghash_p8 |
| +.align 5 |
| +gcm_ghash_p8: |
| + li 0,-4096 |
| + li 8,0x10 |
| + mfspr 12,256 |
| + li 9,0x20 |
| + mtspr 256,0 |
| + li 10,0x30 |
| + .long 0x7C001E99 |
| + |
| + .long 0x7D682699 |
| + li 8,0x40 |
| + lvsl 12,0,0 |
| + .long 0x7D292699 |
| + li 9,0x50 |
| + vspltisb 5,0x07 |
| + .long 0x7D4A2699 |
| + li 10,0x60 |
| + vxor 12,12,5 |
| + .long 0x7D002699 |
| + vperm 0,0,0,12 |
| + vxor 4,4,4 |
| + |
| + cmpldi 6,64 |
| + bge Lgcm_ghash_p8_4x |
| + |
| + .long 0x7C602E99 |
| + addi 5,5,16 |
| + subic. 6,6,16 |
| + vperm 3,3,3,12 |
| + vxor 3,3,0 |
| + beq Lshort |
| + |
| + .long 0x7E682699 |
| + li 8,16 |
| + .long 0x7E292699 |
| + add 9,5,6 |
| + .long 0x7E4A2699 |
| + |
| + |
| +.align 5 |
| +Loop_2x: |
| + .long 0x7E002E99 |
| + vperm 16,16,16,12 |
| + |
| + subic 6,6,32 |
| + .long 0x10039CC8 |
| + .long 0x11B05CC8 |
| + subfe 0,0,0 |
| + .long 0x10238CC8 |
| + .long 0x11D04CC8 |
| + and 0,0,6 |
| + .long 0x104394C8 |
| + .long 0x11F054C8 |
| + add 5,5,0 |
| + |
| + vxor 0,0,13 |
| + vxor 1,1,14 |
| + |
| + .long 0x10E044C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 2,2,15 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + .long 0x7C682E99 |
| + addi 5,5,32 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x100044C8 |
| + vperm 3,3,3,12 |
| + vxor 6,6,2 |
| + vxor 3,3,6 |
| + vxor 3,3,0 |
| + cmpld 9,5 |
| + bgt Loop_2x |
| + |
| + cmplwi 6,0 |
| + bne Leven |
| + |
| +Lshort: |
| + .long 0x10035CC8 |
| + .long 0x10234CC8 |
| + .long 0x104354C8 |
| + |
| + .long 0x10E044C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x100044C8 |
| + vxor 6,6,2 |
| + |
| +Leven: |
| + vxor 0,0,6 |
| + vperm 0,0,0,12 |
| + .long 0x7C001F99 |
| + |
| + mtspr 256,12 |
| + blr |
| +.long 0 |
| +.byte 0,12,0x14,0,0,0,4,0 |
| +.long 0 |
| +.align 5 |
| +.gcm_ghash_p8_4x: |
| +Lgcm_ghash_p8_4x: |
| + stdu 1,-256(1) |
| + li 10,63 |
| + li 11,79 |
| + stvx 20,10,1 |
| + addi 10,10,32 |
| + stvx 21,11,1 |
| + addi 11,11,32 |
| + stvx 22,10,1 |
| + addi 10,10,32 |
| + stvx 23,11,1 |
| + addi 11,11,32 |
| + stvx 24,10,1 |
| + addi 10,10,32 |
| + stvx 25,11,1 |
| + addi 11,11,32 |
| + stvx 26,10,1 |
| + addi 10,10,32 |
| + stvx 27,11,1 |
| + addi 11,11,32 |
| + stvx 28,10,1 |
| + addi 10,10,32 |
| + stvx 29,11,1 |
| + addi 11,11,32 |
| + stvx 30,10,1 |
| + li 10,0x60 |
| + stvx 31,11,1 |
| + li 0,-1 |
| + stw 12,252(1) |
| + mtspr 256,0 |
| + |
| + lvsl 5,0,8 |
| + |
| + li 8,0x70 |
| + .long 0x7E292699 |
| + li 9,0x80 |
| + vspltisb 6,8 |
| + |
| + li 10,0x90 |
| + .long 0x7EE82699 |
| + li 8,0xa0 |
| + .long 0x7F092699 |
| + li 9,0xb0 |
| + .long 0x7F2A2699 |
| + li 10,0xc0 |
| + .long 0x7FA82699 |
| + li 8,0x10 |
| + .long 0x7FC92699 |
| + li 9,0x20 |
| + .long 0x7FEA2699 |
| + li 10,0x30 |
| + |
| + vsldoi 7,4,6,8 |
| + vaddubm 18,5,7 |
| + vaddubm 19,6,18 |
| + |
| + srdi 6,6,4 |
| + |
| + .long 0x7C602E99 |
| + .long 0x7E082E99 |
| + subic. 6,6,8 |
| + .long 0x7EC92E99 |
| + .long 0x7F8A2E99 |
| + addi 5,5,0x40 |
| + vperm 3,3,3,12 |
| + vperm 16,16,16,12 |
| + vperm 22,22,22,12 |
| + vperm 28,28,28,12 |
| + |
| + vxor 2,3,0 |
| + |
| + .long 0x11B0BCC8 |
| + .long 0x11D0C4C8 |
| + .long 0x11F0CCC8 |
| + |
| + vperm 11,17,9,18 |
| + vperm 5,22,28,19 |
| + vperm 10,17,9,19 |
| + vperm 6,22,28,18 |
| + .long 0x12B68CC8 |
| + .long 0x12855CC8 |
| + .long 0x137C4CC8 |
| + .long 0x134654C8 |
| + |
| + vxor 21,21,14 |
| + vxor 20,20,13 |
| + vxor 27,27,21 |
| + vxor 26,26,15 |
| + |
| + blt Ltail_4x |
| + |
| +Loop_4x: |
| + .long 0x7C602E99 |
| + .long 0x7E082E99 |
| + subic. 6,6,4 |
| + .long 0x7EC92E99 |
| + .long 0x7F8A2E99 |
| + addi 5,5,0x40 |
| + vperm 16,16,16,12 |
| + vperm 22,22,22,12 |
| + vperm 28,28,28,12 |
| + vperm 3,3,3,12 |
| + |
| + .long 0x1002ECC8 |
| + .long 0x1022F4C8 |
| + .long 0x1042FCC8 |
| + .long 0x11B0BCC8 |
| + .long 0x11D0C4C8 |
| + .long 0x11F0CCC8 |
| + |
| + vxor 0,0,20 |
| + vxor 1,1,27 |
| + vxor 2,2,26 |
| + vperm 5,22,28,19 |
| + vperm 6,22,28,18 |
| + |
| + .long 0x10E044C8 |
| + .long 0x12855CC8 |
| + .long 0x134654C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x12B68CC8 |
| + .long 0x137C4CC8 |
| + .long 0x100044C8 |
| + |
| + vxor 20,20,13 |
| + vxor 26,26,15 |
| + vxor 2,2,3 |
| + vxor 21,21,14 |
| + vxor 2,2,6 |
| + vxor 27,27,21 |
| + vxor 2,2,0 |
| + bge Loop_4x |
| + |
| +Ltail_4x: |
| + .long 0x1002ECC8 |
| + .long 0x1022F4C8 |
| + .long 0x1042FCC8 |
| + |
| + vxor 0,0,20 |
| + vxor 1,1,27 |
| + |
| + .long 0x10E044C8 |
| + |
| + vsldoi 5,1,4,8 |
| + vsldoi 6,4,1,8 |
| + vxor 2,2,26 |
| + vxor 0,0,5 |
| + vxor 2,2,6 |
| + |
| + vsldoi 0,0,0,8 |
| + vxor 0,0,7 |
| + |
| + vsldoi 6,0,0,8 |
| + .long 0x100044C8 |
| + vxor 6,6,2 |
| + vxor 0,0,6 |
| + |
| + addic. 6,6,4 |
| + beq Ldone_4x |
| + |
| + .long 0x7C602E99 |
| + cmpldi 6,2 |
| + li 6,-4 |
| + blt Lone |
| + .long 0x7E082E99 |
| + beq Ltwo |
| + |
| +Lthree: |
| + .long 0x7EC92E99 |
| + vperm 3,3,3,12 |
| + vperm 16,16,16,12 |
| + vperm 22,22,22,12 |
| + |
| + vxor 2,3,0 |
| + vor 29,23,23 |
| + vor 30, 24, 24 |
| + vor 31,25,25 |
| + |
| + vperm 5,16,22,19 |
| + vperm 6,16,22,18 |
| + .long 0x12B08CC8 |
| + .long 0x13764CC8 |
| + .long 0x12855CC8 |
| + .long 0x134654C8 |
| + |
| + vxor 27,27,21 |
| + b Ltail_4x |
| + |
| +.align 4 |
| +Ltwo: |
| + vperm 3,3,3,12 |
| + vperm 16,16,16,12 |
| + |
| + vxor 2,3,0 |
| + vperm 5,4,16,19 |
| + vperm 6,4,16,18 |
| + |
| + vsldoi 29,4,17,8 |
| + vor 30, 17, 17 |
| + vsldoi 31,17,4,8 |
| + |
| + .long 0x12855CC8 |
| + .long 0x13704CC8 |
| + .long 0x134654C8 |
| + |
| + b Ltail_4x |
| + |
| +.align 4 |
| +Lone: |
| + vperm 3,3,3,12 |
| + |
| + vsldoi 29,4,9,8 |
| + vor 30, 9, 9 |
| + vsldoi 31,9,4,8 |
| + |
| + vxor 2,3,0 |
| + vxor 20,20,20 |
| + vxor 27,27,27 |
| + vxor 26,26,26 |
| + |
| + b Ltail_4x |
| + |
| +Ldone_4x: |
| + vperm 0,0,0,12 |
| + .long 0x7C001F99 |
| + |
| + li 10,63 |
| + li 11,79 |
| + mtspr 256,12 |
| + lvx 20,10,1 |
| + addi 10,10,32 |
| + lvx 21,11,1 |
| + addi 11,11,32 |
| + lvx 22,10,1 |
| + addi 10,10,32 |
| + lvx 23,11,1 |
| + addi 11,11,32 |
| + lvx 24,10,1 |
| + addi 10,10,32 |
| + lvx 25,11,1 |
| + addi 11,11,32 |
| + lvx 26,10,1 |
| + addi 10,10,32 |
| + lvx 27,11,1 |
| + addi 11,11,32 |
| + lvx 28,10,1 |
| + addi 10,10,32 |
| + lvx 29,11,1 |
| + addi 11,11,32 |
| + lvx 30,10,1 |
| + lvx 31,11,1 |
| + addi 1,1,256 |
| + blr |
| +.long 0 |
| +.byte 0,12,0x04,0,0x80,0,4,0 |
| +.long 0 |
| + |
| + |
| +.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
| +.align 2 |
| +.align 2 |