OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 .globl _gcm_gmult_4bit | 5 .globl _gcm_gmult_4bit |
6 .private_extern _gcm_gmult_4bit | 6 .private_extern _gcm_gmult_4bit |
7 | 7 |
8 .p2align 4 | 8 .p2align 4 |
9 _gcm_gmult_4bit: | 9 _gcm_gmult_4bit: |
10 pushq %rbx | 10 pushq %rbx |
11 pushq %rbp | 11 pushq %rbp |
12 pushq %r12 | 12 pushq %r12 |
13 L$gmult_prologue: | 13 L$gmult_prologue: |
14 | 14 |
15 movzbq 15(%rdi),%r8 | 15 movzbq 15(%rdi),%r8 |
16 leaq L$rem_4bit(%rip),%r11 | 16 leaq L$rem_4bit(%rip),%r11 |
17 xorq %rax,%rax | 17 xorq %rax,%rax |
18 xorq %rbx,%rbx | 18 xorq %rbx,%rbx |
19 movb %r8b,%al | 19 movb %r8b,%al |
20 movb %r8b,%bl | 20 movb %r8b,%bl |
21 shlb $4,%al | 21 shlb $4,%al |
22 movq $14,%rcx | 22 movq $14,%rcx |
23 movq 8(%rsi,%rax,1),%r8 | 23 movq 8(%rsi,%rax,1),%r8 |
24 movq (%rsi,%rax,1),%r9 | 24 movq (%rsi,%rax,1),%r9 |
25 » andb» $240,%bl | 25 » andb» $0xf0,%bl |
26 movq %r8,%rdx | 26 movq %r8,%rdx |
27 jmp L$oop1 | 27 jmp L$oop1 |
28 | 28 |
29 .p2align 4 | 29 .p2align 4 |
30 L$oop1: | 30 L$oop1: |
31 shrq $4,%r8 | 31 shrq $4,%r8 |
32 » andq» $15,%rdx | 32 » andq» $0xf,%rdx |
33 movq %r9,%r10 | 33 movq %r9,%r10 |
34 movb (%rdi,%rcx,1),%al | 34 movb (%rdi,%rcx,1),%al |
35 shrq $4,%r9 | 35 shrq $4,%r9 |
36 xorq 8(%rsi,%rbx,1),%r8 | 36 xorq 8(%rsi,%rbx,1),%r8 |
37 shlq $60,%r10 | 37 shlq $60,%r10 |
38 xorq (%rsi,%rbx,1),%r9 | 38 xorq (%rsi,%rbx,1),%r9 |
39 movb %al,%bl | 39 movb %al,%bl |
40 xorq (%r11,%rdx,8),%r9 | 40 xorq (%r11,%rdx,8),%r9 |
41 movq %r8,%rdx | 41 movq %r8,%rdx |
42 shlb $4,%al | 42 shlb $4,%al |
43 xorq %r10,%r8 | 43 xorq %r10,%r8 |
44 decq %rcx | 44 decq %rcx |
45 js L$break1 | 45 js L$break1 |
46 | 46 |
47 shrq $4,%r8 | 47 shrq $4,%r8 |
48 » andq» $15,%rdx | 48 » andq» $0xf,%rdx |
49 movq %r9,%r10 | 49 movq %r9,%r10 |
50 shrq $4,%r9 | 50 shrq $4,%r9 |
51 xorq 8(%rsi,%rax,1),%r8 | 51 xorq 8(%rsi,%rax,1),%r8 |
52 shlq $60,%r10 | 52 shlq $60,%r10 |
53 xorq (%rsi,%rax,1),%r9 | 53 xorq (%rsi,%rax,1),%r9 |
54 » andb» $240,%bl | 54 » andb» $0xf0,%bl |
55 xorq (%r11,%rdx,8),%r9 | 55 xorq (%r11,%rdx,8),%r9 |
56 movq %r8,%rdx | 56 movq %r8,%rdx |
57 xorq %r10,%r8 | 57 xorq %r10,%r8 |
58 jmp L$oop1 | 58 jmp L$oop1 |
59 | 59 |
60 .p2align 4 | 60 .p2align 4 |
61 L$break1: | 61 L$break1: |
62 shrq $4,%r8 | 62 shrq $4,%r8 |
63 » andq» $15,%rdx | 63 » andq» $0xf,%rdx |
64 movq %r9,%r10 | 64 movq %r9,%r10 |
65 shrq $4,%r9 | 65 shrq $4,%r9 |
66 xorq 8(%rsi,%rax,1),%r8 | 66 xorq 8(%rsi,%rax,1),%r8 |
67 shlq $60,%r10 | 67 shlq $60,%r10 |
68 xorq (%rsi,%rax,1),%r9 | 68 xorq (%rsi,%rax,1),%r9 |
69 » andb» $240,%bl | 69 » andb» $0xf0,%bl |
70 xorq (%r11,%rdx,8),%r9 | 70 xorq (%r11,%rdx,8),%r9 |
71 movq %r8,%rdx | 71 movq %r8,%rdx |
72 xorq %r10,%r8 | 72 xorq %r10,%r8 |
73 | 73 |
74 shrq $4,%r8 | 74 shrq $4,%r8 |
75 » andq» $15,%rdx | 75 » andq» $0xf,%rdx |
76 movq %r9,%r10 | 76 movq %r9,%r10 |
77 shrq $4,%r9 | 77 shrq $4,%r9 |
78 xorq 8(%rsi,%rbx,1),%r8 | 78 xorq 8(%rsi,%rbx,1),%r8 |
79 shlq $60,%r10 | 79 shlq $60,%r10 |
80 xorq (%rsi,%rbx,1),%r9 | 80 xorq (%rsi,%rbx,1),%r9 |
81 xorq %r10,%r8 | 81 xorq %r10,%r8 |
82 xorq (%r11,%rdx,8),%r9 | 82 xorq (%r11,%rdx,8),%r9 |
83 | 83 |
84 bswapq %r8 | 84 bswapq %r8 |
85 bswapq %r9 | 85 bswapq %r9 |
(...skipping 787 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
873 .p2align 5 | 873 .p2align 5 |
874 _gcm_ghash_clmul: | 874 _gcm_ghash_clmul: |
875 L$_ghash_clmul: | 875 L$_ghash_clmul: |
876 movdqa L$bswap_mask(%rip),%xmm10 | 876 movdqa L$bswap_mask(%rip),%xmm10 |
877 | 877 |
878 movdqu (%rdi),%xmm0 | 878 movdqu (%rdi),%xmm0 |
879 movdqu (%rsi),%xmm2 | 879 movdqu (%rsi),%xmm2 |
880 movdqu 32(%rsi),%xmm7 | 880 movdqu 32(%rsi),%xmm7 |
881 .byte 102,65,15,56,0,194 | 881 .byte 102,65,15,56,0,194 |
882 | 882 |
883 » subq» $16,%rcx | 883 » subq» $0x10,%rcx |
884 jz L$odd_tail | 884 jz L$odd_tail |
885 | 885 |
886 movdqu 16(%rsi),%xmm6 | 886 movdqu 16(%rsi),%xmm6 |
887 movl _OPENSSL_ia32cap_P+4(%rip),%eax | 887 movl _OPENSSL_ia32cap_P+4(%rip),%eax |
888 » cmpq» $48,%rcx | 888 » cmpq» $0x30,%rcx |
889 jb L$skip4x | 889 jb L$skip4x |
890 | 890 |
891 andl $71303168,%eax | 891 andl $71303168,%eax |
892 cmpl $4194304,%eax | 892 cmpl $4194304,%eax |
893 je L$skip4x | 893 je L$skip4x |
894 | 894 |
895 » subq» $48,%rcx | 895 » subq» $0x30,%rcx |
896 » movq» $11547335547999543296,%rax | 896 » movq» $0xA040608020C0E000,%rax |
897 movdqu 48(%rsi),%xmm14 | 897 movdqu 48(%rsi),%xmm14 |
898 movdqu 64(%rsi),%xmm15 | 898 movdqu 64(%rsi),%xmm15 |
899 | 899 |
900 | 900 |
901 | 901 |
902 | 902 |
903 movdqu 48(%rdx),%xmm3 | 903 movdqu 48(%rdx),%xmm3 |
904 movdqu 32(%rdx),%xmm11 | 904 movdqu 32(%rdx),%xmm11 |
905 .byte 102,65,15,56,0,218 | 905 .byte 102,65,15,56,0,218 |
906 .byte 102,69,15,56,0,218 | 906 .byte 102,69,15,56,0,218 |
(...skipping 26 matching lines...) Expand all Loading... |
933 .byte 102,69,15,58,68,222,0 | 933 .byte 102,69,15,58,68,222,0 |
934 movdqa %xmm0,%xmm1 | 934 movdqa %xmm0,%xmm1 |
935 pshufd $78,%xmm0,%xmm8 | 935 pshufd $78,%xmm0,%xmm8 |
936 pxor %xmm0,%xmm8 | 936 pxor %xmm0,%xmm8 |
937 .byte 102,69,15,58,68,238,17 | 937 .byte 102,69,15,58,68,238,17 |
938 .byte 102,68,15,58,68,231,0 | 938 .byte 102,68,15,58,68,231,0 |
939 xorps %xmm11,%xmm3 | 939 xorps %xmm11,%xmm3 |
940 xorps %xmm13,%xmm5 | 940 xorps %xmm13,%xmm5 |
941 | 941 |
942 leaq 64(%rdx),%rdx | 942 leaq 64(%rdx),%rdx |
943 » subq» $64,%rcx | 943 » subq» $0x40,%rcx |
944 jc L$tail4x | 944 jc L$tail4x |
945 | 945 |
946 jmp L$mod4_loop | 946 jmp L$mod4_loop |
947 .p2align 5 | 947 .p2align 5 |
948 L$mod4_loop: | 948 L$mod4_loop: |
949 .byte 102,65,15,58,68,199,0 | 949 .byte 102,65,15,58,68,199,0 |
950 xorps %xmm12,%xmm4 | 950 xorps %xmm12,%xmm4 |
951 movdqu 48(%rdx),%xmm11 | 951 movdqu 48(%rdx),%xmm11 |
952 .byte 102,69,15,56,0,218 | 952 .byte 102,69,15,56,0,218 |
953 .byte 102,65,15,58,68,207,17 | 953 .byte 102,65,15,58,68,207,17 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1016 movdqa %xmm0,%xmm1 | 1016 movdqa %xmm0,%xmm1 |
1017 .byte 102,69,15,58,68,238,17 | 1017 .byte 102,69,15,58,68,238,17 |
1018 xorps %xmm11,%xmm3 | 1018 xorps %xmm11,%xmm3 |
1019 pshufd $78,%xmm0,%xmm8 | 1019 pshufd $78,%xmm0,%xmm8 |
1020 pxor %xmm0,%xmm8 | 1020 pxor %xmm0,%xmm8 |
1021 | 1021 |
1022 .byte 102,68,15,58,68,231,0 | 1022 .byte 102,68,15,58,68,231,0 |
1023 xorps %xmm13,%xmm5 | 1023 xorps %xmm13,%xmm5 |
1024 | 1024 |
1025 leaq 64(%rdx),%rdx | 1025 leaq 64(%rdx),%rdx |
1026 » subq» $64,%rcx | 1026 » subq» $0x40,%rcx |
1027 jnc L$mod4_loop | 1027 jnc L$mod4_loop |
1028 | 1028 |
1029 L$tail4x: | 1029 L$tail4x: |
1030 .byte 102,65,15,58,68,199,0 | 1030 .byte 102,65,15,58,68,199,0 |
1031 .byte 102,65,15,58,68,207,17 | 1031 .byte 102,65,15,58,68,207,17 |
1032 .byte 102,68,15,58,68,199,16 | 1032 .byte 102,68,15,58,68,199,16 |
1033 xorps %xmm12,%xmm4 | 1033 xorps %xmm12,%xmm4 |
1034 xorps %xmm3,%xmm0 | 1034 xorps %xmm3,%xmm0 |
1035 xorps %xmm5,%xmm1 | 1035 xorps %xmm5,%xmm1 |
1036 pxor %xmm0,%xmm1 | 1036 pxor %xmm0,%xmm1 |
(...skipping 23 matching lines...) Expand all Loading... |
1060 | 1060 |
1061 | 1061 |
1062 movdqa %xmm0,%xmm4 | 1062 movdqa %xmm0,%xmm4 |
1063 psrlq $1,%xmm0 | 1063 psrlq $1,%xmm0 |
1064 pxor %xmm4,%xmm1 | 1064 pxor %xmm4,%xmm1 |
1065 pxor %xmm0,%xmm4 | 1065 pxor %xmm0,%xmm4 |
1066 psrlq $5,%xmm0 | 1066 psrlq $5,%xmm0 |
1067 pxor %xmm4,%xmm0 | 1067 pxor %xmm4,%xmm0 |
1068 psrlq $1,%xmm0 | 1068 psrlq $1,%xmm0 |
1069 pxor %xmm1,%xmm0 | 1069 pxor %xmm1,%xmm0 |
1070 » addq» $64,%rcx | 1070 » addq» $0x40,%rcx |
1071 jz L$done | 1071 jz L$done |
1072 movdqu 32(%rsi),%xmm7 | 1072 movdqu 32(%rsi),%xmm7 |
1073 » subq» $16,%rcx | 1073 » subq» $0x10,%rcx |
1074 jz L$odd_tail | 1074 jz L$odd_tail |
1075 L$skip4x: | 1075 L$skip4x: |
1076 | 1076 |
1077 | 1077 |
1078 | 1078 |
1079 | 1079 |
1080 | 1080 |
1081 movdqu (%rdx),%xmm8 | 1081 movdqu (%rdx),%xmm8 |
1082 movdqu 16(%rdx),%xmm3 | 1082 movdqu 16(%rdx),%xmm3 |
1083 .byte 102,69,15,56,0,194 | 1083 .byte 102,69,15,56,0,194 |
1084 .byte 102,65,15,56,0,218 | 1084 .byte 102,65,15,56,0,218 |
1085 pxor %xmm8,%xmm0 | 1085 pxor %xmm8,%xmm0 |
1086 | 1086 |
1087 movdqa %xmm3,%xmm5 | 1087 movdqa %xmm3,%xmm5 |
1088 pshufd $78,%xmm3,%xmm4 | 1088 pshufd $78,%xmm3,%xmm4 |
1089 pxor %xmm3,%xmm4 | 1089 pxor %xmm3,%xmm4 |
1090 .byte 102,15,58,68,218,0 | 1090 .byte 102,15,58,68,218,0 |
1091 .byte 102,15,58,68,234,17 | 1091 .byte 102,15,58,68,234,17 |
1092 .byte 102,15,58,68,231,0 | 1092 .byte 102,15,58,68,231,0 |
1093 | 1093 |
1094 leaq 32(%rdx),%rdx | 1094 leaq 32(%rdx),%rdx |
1095 nop | 1095 nop |
1096 » subq» $32,%rcx | 1096 » subq» $0x20,%rcx |
1097 jbe L$even_tail | 1097 jbe L$even_tail |
1098 nop | 1098 nop |
1099 jmp L$mod_loop | 1099 jmp L$mod_loop |
1100 | 1100 |
1101 .p2align 5 | 1101 .p2align 5 |
1102 L$mod_loop: | 1102 L$mod_loop: |
1103 movdqa %xmm0,%xmm1 | 1103 movdqa %xmm0,%xmm1 |
1104 movdqa %xmm4,%xmm8 | 1104 movdqa %xmm4,%xmm8 |
1105 pshufd $78,%xmm0,%xmm4 | 1105 pshufd $78,%xmm0,%xmm4 |
1106 pxor %xmm0,%xmm4 | 1106 pxor %xmm0,%xmm4 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1149 .byte 102,15,58,68,234,17 | 1149 .byte 102,15,58,68,234,17 |
1150 pxor %xmm9,%xmm1 | 1150 pxor %xmm9,%xmm1 |
1151 pxor %xmm0,%xmm9 | 1151 pxor %xmm0,%xmm9 |
1152 psrlq $5,%xmm0 | 1152 psrlq $5,%xmm0 |
1153 pxor %xmm9,%xmm0 | 1153 pxor %xmm9,%xmm0 |
1154 leaq 32(%rdx),%rdx | 1154 leaq 32(%rdx),%rdx |
1155 psrlq $1,%xmm0 | 1155 psrlq $1,%xmm0 |
1156 .byte 102,15,58,68,231,0 | 1156 .byte 102,15,58,68,231,0 |
1157 pxor %xmm1,%xmm0 | 1157 pxor %xmm1,%xmm0 |
1158 | 1158 |
1159 » subq» $32,%rcx | 1159 » subq» $0x20,%rcx |
1160 ja L$mod_loop | 1160 ja L$mod_loop |
1161 | 1161 |
1162 L$even_tail: | 1162 L$even_tail: |
1163 movdqa %xmm0,%xmm1 | 1163 movdqa %xmm0,%xmm1 |
1164 movdqa %xmm4,%xmm8 | 1164 movdqa %xmm4,%xmm8 |
1165 pshufd $78,%xmm0,%xmm4 | 1165 pshufd $78,%xmm0,%xmm4 |
1166 pxor %xmm0,%xmm4 | 1166 pxor %xmm0,%xmm4 |
1167 | 1167 |
1168 .byte 102,15,58,68,198,0 | 1168 .byte 102,15,58,68,198,0 |
1169 .byte 102,15,58,68,206,17 | 1169 .byte 102,15,58,68,206,17 |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1319 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | 1319 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
1320 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | 1320 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
1321 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | 1321 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
1322 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | 1322 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
1323 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | 1323 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
1324 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | 1324 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
1325 | 1325 |
1326 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 1326 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
1327 .p2align 6 | 1327 .p2align 6 |
1328 #endif | 1328 #endif |
OLD | NEW |