OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 .extern OPENSSL_ia32cap_P | 3 .extern OPENSSL_ia32cap_P |
4 .hidden OPENSSL_ia32cap_P | 4 .hidden OPENSSL_ia32cap_P |
5 | 5 |
6 .globl gcm_gmult_4bit | 6 .globl gcm_gmult_4bit |
7 .hidden gcm_gmult_4bit | 7 .hidden gcm_gmult_4bit |
8 .type gcm_gmult_4bit,@function | 8 .type gcm_gmult_4bit,@function |
9 .align 16 | 9 .align 16 |
10 gcm_gmult_4bit: | 10 gcm_gmult_4bit: |
11 pushq %rbx | 11 pushq %rbx |
12 pushq %rbp | 12 pushq %rbp |
13 pushq %r12 | 13 pushq %r12 |
14 .Lgmult_prologue: | 14 .Lgmult_prologue: |
15 | 15 |
16 movzbq 15(%rdi),%r8 | 16 movzbq 15(%rdi),%r8 |
17 leaq .Lrem_4bit(%rip),%r11 | 17 leaq .Lrem_4bit(%rip),%r11 |
18 xorq %rax,%rax | 18 xorq %rax,%rax |
19 xorq %rbx,%rbx | 19 xorq %rbx,%rbx |
20 movb %r8b,%al | 20 movb %r8b,%al |
21 movb %r8b,%bl | 21 movb %r8b,%bl |
22 shlb $4,%al | 22 shlb $4,%al |
23 movq $14,%rcx | 23 movq $14,%rcx |
24 movq 8(%rsi,%rax,1),%r8 | 24 movq 8(%rsi,%rax,1),%r8 |
25 movq (%rsi,%rax,1),%r9 | 25 movq (%rsi,%rax,1),%r9 |
26 » andb» $240,%bl | 26 » andb» $0xf0,%bl |
27 movq %r8,%rdx | 27 movq %r8,%rdx |
28 jmp .Loop1 | 28 jmp .Loop1 |
29 | 29 |
30 .align 16 | 30 .align 16 |
31 .Loop1: | 31 .Loop1: |
32 shrq $4,%r8 | 32 shrq $4,%r8 |
33 » andq» $15,%rdx | 33 » andq» $0xf,%rdx |
34 movq %r9,%r10 | 34 movq %r9,%r10 |
35 movb (%rdi,%rcx,1),%al | 35 movb (%rdi,%rcx,1),%al |
36 shrq $4,%r9 | 36 shrq $4,%r9 |
37 xorq 8(%rsi,%rbx,1),%r8 | 37 xorq 8(%rsi,%rbx,1),%r8 |
38 shlq $60,%r10 | 38 shlq $60,%r10 |
39 xorq (%rsi,%rbx,1),%r9 | 39 xorq (%rsi,%rbx,1),%r9 |
40 movb %al,%bl | 40 movb %al,%bl |
41 xorq (%r11,%rdx,8),%r9 | 41 xorq (%r11,%rdx,8),%r9 |
42 movq %r8,%rdx | 42 movq %r8,%rdx |
43 shlb $4,%al | 43 shlb $4,%al |
44 xorq %r10,%r8 | 44 xorq %r10,%r8 |
45 decq %rcx | 45 decq %rcx |
46 js .Lbreak1 | 46 js .Lbreak1 |
47 | 47 |
48 shrq $4,%r8 | 48 shrq $4,%r8 |
49 » andq» $15,%rdx | 49 » andq» $0xf,%rdx |
50 movq %r9,%r10 | 50 movq %r9,%r10 |
51 shrq $4,%r9 | 51 shrq $4,%r9 |
52 xorq 8(%rsi,%rax,1),%r8 | 52 xorq 8(%rsi,%rax,1),%r8 |
53 shlq $60,%r10 | 53 shlq $60,%r10 |
54 xorq (%rsi,%rax,1),%r9 | 54 xorq (%rsi,%rax,1),%r9 |
55 » andb» $240,%bl | 55 » andb» $0xf0,%bl |
56 xorq (%r11,%rdx,8),%r9 | 56 xorq (%r11,%rdx,8),%r9 |
57 movq %r8,%rdx | 57 movq %r8,%rdx |
58 xorq %r10,%r8 | 58 xorq %r10,%r8 |
59 jmp .Loop1 | 59 jmp .Loop1 |
60 | 60 |
61 .align 16 | 61 .align 16 |
62 .Lbreak1: | 62 .Lbreak1: |
63 shrq $4,%r8 | 63 shrq $4,%r8 |
64 » andq» $15,%rdx | 64 » andq» $0xf,%rdx |
65 movq %r9,%r10 | 65 movq %r9,%r10 |
66 shrq $4,%r9 | 66 shrq $4,%r9 |
67 xorq 8(%rsi,%rax,1),%r8 | 67 xorq 8(%rsi,%rax,1),%r8 |
68 shlq $60,%r10 | 68 shlq $60,%r10 |
69 xorq (%rsi,%rax,1),%r9 | 69 xorq (%rsi,%rax,1),%r9 |
70 » andb» $240,%bl | 70 » andb» $0xf0,%bl |
71 xorq (%r11,%rdx,8),%r9 | 71 xorq (%r11,%rdx,8),%r9 |
72 movq %r8,%rdx | 72 movq %r8,%rdx |
73 xorq %r10,%r8 | 73 xorq %r10,%r8 |
74 | 74 |
75 shrq $4,%r8 | 75 shrq $4,%r8 |
76 » andq» $15,%rdx | 76 » andq» $0xf,%rdx |
77 movq %r9,%r10 | 77 movq %r9,%r10 |
78 shrq $4,%r9 | 78 shrq $4,%r9 |
79 xorq 8(%rsi,%rbx,1),%r8 | 79 xorq 8(%rsi,%rbx,1),%r8 |
80 shlq $60,%r10 | 80 shlq $60,%r10 |
81 xorq (%rsi,%rbx,1),%r9 | 81 xorq (%rsi,%rbx,1),%r9 |
82 xorq %r10,%r8 | 82 xorq %r10,%r8 |
83 xorq (%r11,%rdx,8),%r9 | 83 xorq (%r11,%rdx,8),%r9 |
84 | 84 |
85 bswapq %r8 | 85 bswapq %r8 |
86 bswapq %r9 | 86 bswapq %r9 |
(...skipping 787 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
874 .align 32 | 874 .align 32 |
875 gcm_ghash_clmul: | 875 gcm_ghash_clmul: |
876 .L_ghash_clmul: | 876 .L_ghash_clmul: |
877 movdqa .Lbswap_mask(%rip),%xmm10 | 877 movdqa .Lbswap_mask(%rip),%xmm10 |
878 | 878 |
879 movdqu (%rdi),%xmm0 | 879 movdqu (%rdi),%xmm0 |
880 movdqu (%rsi),%xmm2 | 880 movdqu (%rsi),%xmm2 |
881 movdqu 32(%rsi),%xmm7 | 881 movdqu 32(%rsi),%xmm7 |
882 .byte 102,65,15,56,0,194 | 882 .byte 102,65,15,56,0,194 |
883 | 883 |
884 » subq» $16,%rcx | 884 » subq» $0x10,%rcx |
885 jz .Lodd_tail | 885 jz .Lodd_tail |
886 | 886 |
887 movdqu 16(%rsi),%xmm6 | 887 movdqu 16(%rsi),%xmm6 |
888 movl OPENSSL_ia32cap_P+4(%rip),%eax | 888 movl OPENSSL_ia32cap_P+4(%rip),%eax |
889 » cmpq» $48,%rcx | 889 » cmpq» $0x30,%rcx |
890 jb .Lskip4x | 890 jb .Lskip4x |
891 | 891 |
892 andl $71303168,%eax | 892 andl $71303168,%eax |
893 cmpl $4194304,%eax | 893 cmpl $4194304,%eax |
894 je .Lskip4x | 894 je .Lskip4x |
895 | 895 |
896 » subq» $48,%rcx | 896 » subq» $0x30,%rcx |
897 » movq» $11547335547999543296,%rax | 897 » movq» $0xA040608020C0E000,%rax |
898 movdqu 48(%rsi),%xmm14 | 898 movdqu 48(%rsi),%xmm14 |
899 movdqu 64(%rsi),%xmm15 | 899 movdqu 64(%rsi),%xmm15 |
900 | 900 |
901 | 901 |
902 | 902 |
903 | 903 |
904 movdqu 48(%rdx),%xmm3 | 904 movdqu 48(%rdx),%xmm3 |
905 movdqu 32(%rdx),%xmm11 | 905 movdqu 32(%rdx),%xmm11 |
906 .byte 102,65,15,56,0,218 | 906 .byte 102,65,15,56,0,218 |
907 .byte 102,69,15,56,0,218 | 907 .byte 102,69,15,56,0,218 |
(...skipping 26 matching lines...) Expand all Loading... |
934 .byte 102,69,15,58,68,222,0 | 934 .byte 102,69,15,58,68,222,0 |
935 movdqa %xmm0,%xmm1 | 935 movdqa %xmm0,%xmm1 |
936 pshufd $78,%xmm0,%xmm8 | 936 pshufd $78,%xmm0,%xmm8 |
937 pxor %xmm0,%xmm8 | 937 pxor %xmm0,%xmm8 |
938 .byte 102,69,15,58,68,238,17 | 938 .byte 102,69,15,58,68,238,17 |
939 .byte 102,68,15,58,68,231,0 | 939 .byte 102,68,15,58,68,231,0 |
940 xorps %xmm11,%xmm3 | 940 xorps %xmm11,%xmm3 |
941 xorps %xmm13,%xmm5 | 941 xorps %xmm13,%xmm5 |
942 | 942 |
943 leaq 64(%rdx),%rdx | 943 leaq 64(%rdx),%rdx |
944 » subq» $64,%rcx | 944 » subq» $0x40,%rcx |
945 jc .Ltail4x | 945 jc .Ltail4x |
946 | 946 |
947 jmp .Lmod4_loop | 947 jmp .Lmod4_loop |
948 .align 32 | 948 .align 32 |
949 .Lmod4_loop: | 949 .Lmod4_loop: |
950 .byte 102,65,15,58,68,199,0 | 950 .byte 102,65,15,58,68,199,0 |
951 xorps %xmm12,%xmm4 | 951 xorps %xmm12,%xmm4 |
952 movdqu 48(%rdx),%xmm11 | 952 movdqu 48(%rdx),%xmm11 |
953 .byte 102,69,15,56,0,218 | 953 .byte 102,69,15,56,0,218 |
954 .byte 102,65,15,58,68,207,17 | 954 .byte 102,65,15,58,68,207,17 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1017 movdqa %xmm0,%xmm1 | 1017 movdqa %xmm0,%xmm1 |
1018 .byte 102,69,15,58,68,238,17 | 1018 .byte 102,69,15,58,68,238,17 |
1019 xorps %xmm11,%xmm3 | 1019 xorps %xmm11,%xmm3 |
1020 pshufd $78,%xmm0,%xmm8 | 1020 pshufd $78,%xmm0,%xmm8 |
1021 pxor %xmm0,%xmm8 | 1021 pxor %xmm0,%xmm8 |
1022 | 1022 |
1023 .byte 102,68,15,58,68,231,0 | 1023 .byte 102,68,15,58,68,231,0 |
1024 xorps %xmm13,%xmm5 | 1024 xorps %xmm13,%xmm5 |
1025 | 1025 |
1026 leaq 64(%rdx),%rdx | 1026 leaq 64(%rdx),%rdx |
1027 » subq» $64,%rcx | 1027 » subq» $0x40,%rcx |
1028 jnc .Lmod4_loop | 1028 jnc .Lmod4_loop |
1029 | 1029 |
1030 .Ltail4x: | 1030 .Ltail4x: |
1031 .byte 102,65,15,58,68,199,0 | 1031 .byte 102,65,15,58,68,199,0 |
1032 .byte 102,65,15,58,68,207,17 | 1032 .byte 102,65,15,58,68,207,17 |
1033 .byte 102,68,15,58,68,199,16 | 1033 .byte 102,68,15,58,68,199,16 |
1034 xorps %xmm12,%xmm4 | 1034 xorps %xmm12,%xmm4 |
1035 xorps %xmm3,%xmm0 | 1035 xorps %xmm3,%xmm0 |
1036 xorps %xmm5,%xmm1 | 1036 xorps %xmm5,%xmm1 |
1037 pxor %xmm0,%xmm1 | 1037 pxor %xmm0,%xmm1 |
(...skipping 23 matching lines...) Expand all Loading... |
1061 | 1061 |
1062 | 1062 |
1063 movdqa %xmm0,%xmm4 | 1063 movdqa %xmm0,%xmm4 |
1064 psrlq $1,%xmm0 | 1064 psrlq $1,%xmm0 |
1065 pxor %xmm4,%xmm1 | 1065 pxor %xmm4,%xmm1 |
1066 pxor %xmm0,%xmm4 | 1066 pxor %xmm0,%xmm4 |
1067 psrlq $5,%xmm0 | 1067 psrlq $5,%xmm0 |
1068 pxor %xmm4,%xmm0 | 1068 pxor %xmm4,%xmm0 |
1069 psrlq $1,%xmm0 | 1069 psrlq $1,%xmm0 |
1070 pxor %xmm1,%xmm0 | 1070 pxor %xmm1,%xmm0 |
1071 » addq» $64,%rcx | 1071 » addq» $0x40,%rcx |
1072 jz .Ldone | 1072 jz .Ldone |
1073 movdqu 32(%rsi),%xmm7 | 1073 movdqu 32(%rsi),%xmm7 |
1074 » subq» $16,%rcx | 1074 » subq» $0x10,%rcx |
1075 jz .Lodd_tail | 1075 jz .Lodd_tail |
1076 .Lskip4x: | 1076 .Lskip4x: |
1077 | 1077 |
1078 | 1078 |
1079 | 1079 |
1080 | 1080 |
1081 | 1081 |
1082 movdqu (%rdx),%xmm8 | 1082 movdqu (%rdx),%xmm8 |
1083 movdqu 16(%rdx),%xmm3 | 1083 movdqu 16(%rdx),%xmm3 |
1084 .byte 102,69,15,56,0,194 | 1084 .byte 102,69,15,56,0,194 |
1085 .byte 102,65,15,56,0,218 | 1085 .byte 102,65,15,56,0,218 |
1086 pxor %xmm8,%xmm0 | 1086 pxor %xmm8,%xmm0 |
1087 | 1087 |
1088 movdqa %xmm3,%xmm5 | 1088 movdqa %xmm3,%xmm5 |
1089 pshufd $78,%xmm3,%xmm4 | 1089 pshufd $78,%xmm3,%xmm4 |
1090 pxor %xmm3,%xmm4 | 1090 pxor %xmm3,%xmm4 |
1091 .byte 102,15,58,68,218,0 | 1091 .byte 102,15,58,68,218,0 |
1092 .byte 102,15,58,68,234,17 | 1092 .byte 102,15,58,68,234,17 |
1093 .byte 102,15,58,68,231,0 | 1093 .byte 102,15,58,68,231,0 |
1094 | 1094 |
1095 leaq 32(%rdx),%rdx | 1095 leaq 32(%rdx),%rdx |
1096 nop | 1096 nop |
1097 » subq» $32,%rcx | 1097 » subq» $0x20,%rcx |
1098 jbe .Leven_tail | 1098 jbe .Leven_tail |
1099 nop | 1099 nop |
1100 jmp .Lmod_loop | 1100 jmp .Lmod_loop |
1101 | 1101 |
1102 .align 32 | 1102 .align 32 |
1103 .Lmod_loop: | 1103 .Lmod_loop: |
1104 movdqa %xmm0,%xmm1 | 1104 movdqa %xmm0,%xmm1 |
1105 movdqa %xmm4,%xmm8 | 1105 movdqa %xmm4,%xmm8 |
1106 pshufd $78,%xmm0,%xmm4 | 1106 pshufd $78,%xmm0,%xmm4 |
1107 pxor %xmm0,%xmm4 | 1107 pxor %xmm0,%xmm4 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1150 .byte 102,15,58,68,234,17 | 1150 .byte 102,15,58,68,234,17 |
1151 pxor %xmm9,%xmm1 | 1151 pxor %xmm9,%xmm1 |
1152 pxor %xmm0,%xmm9 | 1152 pxor %xmm0,%xmm9 |
1153 psrlq $5,%xmm0 | 1153 psrlq $5,%xmm0 |
1154 pxor %xmm9,%xmm0 | 1154 pxor %xmm9,%xmm0 |
1155 leaq 32(%rdx),%rdx | 1155 leaq 32(%rdx),%rdx |
1156 psrlq $1,%xmm0 | 1156 psrlq $1,%xmm0 |
1157 .byte 102,15,58,68,231,0 | 1157 .byte 102,15,58,68,231,0 |
1158 pxor %xmm1,%xmm0 | 1158 pxor %xmm1,%xmm0 |
1159 | 1159 |
1160 » subq» $32,%rcx | 1160 » subq» $0x20,%rcx |
1161 ja .Lmod_loop | 1161 ja .Lmod_loop |
1162 | 1162 |
1163 .Leven_tail: | 1163 .Leven_tail: |
1164 movdqa %xmm0,%xmm1 | 1164 movdqa %xmm0,%xmm1 |
1165 movdqa %xmm4,%xmm8 | 1165 movdqa %xmm4,%xmm8 |
1166 pshufd $78,%xmm0,%xmm4 | 1166 pshufd $78,%xmm0,%xmm4 |
1167 pxor %xmm0,%xmm4 | 1167 pxor %xmm0,%xmm4 |
1168 | 1168 |
1169 .byte 102,15,58,68,198,0 | 1169 .byte 102,15,58,68,198,0 |
1170 .byte 102,15,58,68,206,17 | 1170 .byte 102,15,58,68,206,17 |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1320 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | 1320 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
1321 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | 1321 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
1322 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | 1322 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
1323 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | 1323 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
1324 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | 1324 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
1325 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | 1325 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
1326 | 1326 |
1327 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 1327 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
1328 .align 64 | 1328 .align 64 |
1329 #endif | 1329 #endif |
OLD | NEW |