| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 .extern OPENSSL_ia32cap_P | 3 .extern OPENSSL_ia32cap_P |
| 4 .hidden OPENSSL_ia32cap_P | 4 .hidden OPENSSL_ia32cap_P |
| 5 | 5 |
| 6 .globl gcm_gmult_4bit | 6 .globl gcm_gmult_4bit |
| 7 .hidden gcm_gmult_4bit | 7 .hidden gcm_gmult_4bit |
| 8 .type gcm_gmult_4bit,@function | 8 .type gcm_gmult_4bit,@function |
| 9 .align 16 | 9 .align 16 |
| 10 gcm_gmult_4bit: | 10 gcm_gmult_4bit: |
| 11 pushq %rbx | 11 pushq %rbx |
| 12 pushq %rbp | 12 pushq %rbp |
| 13 pushq %r12 | 13 pushq %r12 |
| 14 .Lgmult_prologue: | 14 .Lgmult_prologue: |
| 15 | 15 |
| 16 movzbq 15(%rdi),%r8 | 16 movzbq 15(%rdi),%r8 |
| 17 leaq .Lrem_4bit(%rip),%r11 | 17 leaq .Lrem_4bit(%rip),%r11 |
| 18 xorq %rax,%rax | 18 xorq %rax,%rax |
| 19 xorq %rbx,%rbx | 19 xorq %rbx,%rbx |
| 20 movb %r8b,%al | 20 movb %r8b,%al |
| 21 movb %r8b,%bl | 21 movb %r8b,%bl |
| 22 shlb $4,%al | 22 shlb $4,%al |
| 23 movq $14,%rcx | 23 movq $14,%rcx |
| 24 movq 8(%rsi,%rax,1),%r8 | 24 movq 8(%rsi,%rax,1),%r8 |
| 25 movq (%rsi,%rax,1),%r9 | 25 movq (%rsi,%rax,1),%r9 |
| 26 » andb» $240,%bl | 26 » andb» $0xf0,%bl |
| 27 movq %r8,%rdx | 27 movq %r8,%rdx |
| 28 jmp .Loop1 | 28 jmp .Loop1 |
| 29 | 29 |
| 30 .align 16 | 30 .align 16 |
| 31 .Loop1: | 31 .Loop1: |
| 32 shrq $4,%r8 | 32 shrq $4,%r8 |
| 33 » andq» $15,%rdx | 33 » andq» $0xf,%rdx |
| 34 movq %r9,%r10 | 34 movq %r9,%r10 |
| 35 movb (%rdi,%rcx,1),%al | 35 movb (%rdi,%rcx,1),%al |
| 36 shrq $4,%r9 | 36 shrq $4,%r9 |
| 37 xorq 8(%rsi,%rbx,1),%r8 | 37 xorq 8(%rsi,%rbx,1),%r8 |
| 38 shlq $60,%r10 | 38 shlq $60,%r10 |
| 39 xorq (%rsi,%rbx,1),%r9 | 39 xorq (%rsi,%rbx,1),%r9 |
| 40 movb %al,%bl | 40 movb %al,%bl |
| 41 xorq (%r11,%rdx,8),%r9 | 41 xorq (%r11,%rdx,8),%r9 |
| 42 movq %r8,%rdx | 42 movq %r8,%rdx |
| 43 shlb $4,%al | 43 shlb $4,%al |
| 44 xorq %r10,%r8 | 44 xorq %r10,%r8 |
| 45 decq %rcx | 45 decq %rcx |
| 46 js .Lbreak1 | 46 js .Lbreak1 |
| 47 | 47 |
| 48 shrq $4,%r8 | 48 shrq $4,%r8 |
| 49 » andq» $15,%rdx | 49 » andq» $0xf,%rdx |
| 50 movq %r9,%r10 | 50 movq %r9,%r10 |
| 51 shrq $4,%r9 | 51 shrq $4,%r9 |
| 52 xorq 8(%rsi,%rax,1),%r8 | 52 xorq 8(%rsi,%rax,1),%r8 |
| 53 shlq $60,%r10 | 53 shlq $60,%r10 |
| 54 xorq (%rsi,%rax,1),%r9 | 54 xorq (%rsi,%rax,1),%r9 |
| 55 » andb» $240,%bl | 55 » andb» $0xf0,%bl |
| 56 xorq (%r11,%rdx,8),%r9 | 56 xorq (%r11,%rdx,8),%r9 |
| 57 movq %r8,%rdx | 57 movq %r8,%rdx |
| 58 xorq %r10,%r8 | 58 xorq %r10,%r8 |
| 59 jmp .Loop1 | 59 jmp .Loop1 |
| 60 | 60 |
| 61 .align 16 | 61 .align 16 |
| 62 .Lbreak1: | 62 .Lbreak1: |
| 63 shrq $4,%r8 | 63 shrq $4,%r8 |
| 64 » andq» $15,%rdx | 64 » andq» $0xf,%rdx |
| 65 movq %r9,%r10 | 65 movq %r9,%r10 |
| 66 shrq $4,%r9 | 66 shrq $4,%r9 |
| 67 xorq 8(%rsi,%rax,1),%r8 | 67 xorq 8(%rsi,%rax,1),%r8 |
| 68 shlq $60,%r10 | 68 shlq $60,%r10 |
| 69 xorq (%rsi,%rax,1),%r9 | 69 xorq (%rsi,%rax,1),%r9 |
| 70 » andb» $240,%bl | 70 » andb» $0xf0,%bl |
| 71 xorq (%r11,%rdx,8),%r9 | 71 xorq (%r11,%rdx,8),%r9 |
| 72 movq %r8,%rdx | 72 movq %r8,%rdx |
| 73 xorq %r10,%r8 | 73 xorq %r10,%r8 |
| 74 | 74 |
| 75 shrq $4,%r8 | 75 shrq $4,%r8 |
| 76 » andq» $15,%rdx | 76 » andq» $0xf,%rdx |
| 77 movq %r9,%r10 | 77 movq %r9,%r10 |
| 78 shrq $4,%r9 | 78 shrq $4,%r9 |
| 79 xorq 8(%rsi,%rbx,1),%r8 | 79 xorq 8(%rsi,%rbx,1),%r8 |
| 80 shlq $60,%r10 | 80 shlq $60,%r10 |
| 81 xorq (%rsi,%rbx,1),%r9 | 81 xorq (%rsi,%rbx,1),%r9 |
| 82 xorq %r10,%r8 | 82 xorq %r10,%r8 |
| 83 xorq (%r11,%rdx,8),%r9 | 83 xorq (%r11,%rdx,8),%r9 |
| 84 | 84 |
| 85 bswapq %r8 | 85 bswapq %r8 |
| 86 bswapq %r9 | 86 bswapq %r9 |
| (...skipping 787 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 874 .align 32 | 874 .align 32 |
| 875 gcm_ghash_clmul: | 875 gcm_ghash_clmul: |
| 876 .L_ghash_clmul: | 876 .L_ghash_clmul: |
| 877 movdqa .Lbswap_mask(%rip),%xmm10 | 877 movdqa .Lbswap_mask(%rip),%xmm10 |
| 878 | 878 |
| 879 movdqu (%rdi),%xmm0 | 879 movdqu (%rdi),%xmm0 |
| 880 movdqu (%rsi),%xmm2 | 880 movdqu (%rsi),%xmm2 |
| 881 movdqu 32(%rsi),%xmm7 | 881 movdqu 32(%rsi),%xmm7 |
| 882 .byte 102,65,15,56,0,194 | 882 .byte 102,65,15,56,0,194 |
| 883 | 883 |
| 884 » subq» $16,%rcx | 884 » subq» $0x10,%rcx |
| 885 jz .Lodd_tail | 885 jz .Lodd_tail |
| 886 | 886 |
| 887 movdqu 16(%rsi),%xmm6 | 887 movdqu 16(%rsi),%xmm6 |
| 888 movl OPENSSL_ia32cap_P+4(%rip),%eax | 888 movl OPENSSL_ia32cap_P+4(%rip),%eax |
| 889 » cmpq» $48,%rcx | 889 » cmpq» $0x30,%rcx |
| 890 jb .Lskip4x | 890 jb .Lskip4x |
| 891 | 891 |
| 892 andl $71303168,%eax | 892 andl $71303168,%eax |
| 893 cmpl $4194304,%eax | 893 cmpl $4194304,%eax |
| 894 je .Lskip4x | 894 je .Lskip4x |
| 895 | 895 |
| 896 » subq» $48,%rcx | 896 » subq» $0x30,%rcx |
| 897 » movq» $11547335547999543296,%rax | 897 » movq» $0xA040608020C0E000,%rax |
| 898 movdqu 48(%rsi),%xmm14 | 898 movdqu 48(%rsi),%xmm14 |
| 899 movdqu 64(%rsi),%xmm15 | 899 movdqu 64(%rsi),%xmm15 |
| 900 | 900 |
| 901 | 901 |
| 902 | 902 |
| 903 | 903 |
| 904 movdqu 48(%rdx),%xmm3 | 904 movdqu 48(%rdx),%xmm3 |
| 905 movdqu 32(%rdx),%xmm11 | 905 movdqu 32(%rdx),%xmm11 |
| 906 .byte 102,65,15,56,0,218 | 906 .byte 102,65,15,56,0,218 |
| 907 .byte 102,69,15,56,0,218 | 907 .byte 102,69,15,56,0,218 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 934 .byte 102,69,15,58,68,222,0 | 934 .byte 102,69,15,58,68,222,0 |
| 935 movdqa %xmm0,%xmm1 | 935 movdqa %xmm0,%xmm1 |
| 936 pshufd $78,%xmm0,%xmm8 | 936 pshufd $78,%xmm0,%xmm8 |
| 937 pxor %xmm0,%xmm8 | 937 pxor %xmm0,%xmm8 |
| 938 .byte 102,69,15,58,68,238,17 | 938 .byte 102,69,15,58,68,238,17 |
| 939 .byte 102,68,15,58,68,231,0 | 939 .byte 102,68,15,58,68,231,0 |
| 940 xorps %xmm11,%xmm3 | 940 xorps %xmm11,%xmm3 |
| 941 xorps %xmm13,%xmm5 | 941 xorps %xmm13,%xmm5 |
| 942 | 942 |
| 943 leaq 64(%rdx),%rdx | 943 leaq 64(%rdx),%rdx |
| 944 » subq» $64,%rcx | 944 » subq» $0x40,%rcx |
| 945 jc .Ltail4x | 945 jc .Ltail4x |
| 946 | 946 |
| 947 jmp .Lmod4_loop | 947 jmp .Lmod4_loop |
| 948 .align 32 | 948 .align 32 |
| 949 .Lmod4_loop: | 949 .Lmod4_loop: |
| 950 .byte 102,65,15,58,68,199,0 | 950 .byte 102,65,15,58,68,199,0 |
| 951 xorps %xmm12,%xmm4 | 951 xorps %xmm12,%xmm4 |
| 952 movdqu 48(%rdx),%xmm11 | 952 movdqu 48(%rdx),%xmm11 |
| 953 .byte 102,69,15,56,0,218 | 953 .byte 102,69,15,56,0,218 |
| 954 .byte 102,65,15,58,68,207,17 | 954 .byte 102,65,15,58,68,207,17 |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1017 movdqa %xmm0,%xmm1 | 1017 movdqa %xmm0,%xmm1 |
| 1018 .byte 102,69,15,58,68,238,17 | 1018 .byte 102,69,15,58,68,238,17 |
| 1019 xorps %xmm11,%xmm3 | 1019 xorps %xmm11,%xmm3 |
| 1020 pshufd $78,%xmm0,%xmm8 | 1020 pshufd $78,%xmm0,%xmm8 |
| 1021 pxor %xmm0,%xmm8 | 1021 pxor %xmm0,%xmm8 |
| 1022 | 1022 |
| 1023 .byte 102,68,15,58,68,231,0 | 1023 .byte 102,68,15,58,68,231,0 |
| 1024 xorps %xmm13,%xmm5 | 1024 xorps %xmm13,%xmm5 |
| 1025 | 1025 |
| 1026 leaq 64(%rdx),%rdx | 1026 leaq 64(%rdx),%rdx |
| 1027 » subq» $64,%rcx | 1027 » subq» $0x40,%rcx |
| 1028 jnc .Lmod4_loop | 1028 jnc .Lmod4_loop |
| 1029 | 1029 |
| 1030 .Ltail4x: | 1030 .Ltail4x: |
| 1031 .byte 102,65,15,58,68,199,0 | 1031 .byte 102,65,15,58,68,199,0 |
| 1032 .byte 102,65,15,58,68,207,17 | 1032 .byte 102,65,15,58,68,207,17 |
| 1033 .byte 102,68,15,58,68,199,16 | 1033 .byte 102,68,15,58,68,199,16 |
| 1034 xorps %xmm12,%xmm4 | 1034 xorps %xmm12,%xmm4 |
| 1035 xorps %xmm3,%xmm0 | 1035 xorps %xmm3,%xmm0 |
| 1036 xorps %xmm5,%xmm1 | 1036 xorps %xmm5,%xmm1 |
| 1037 pxor %xmm0,%xmm1 | 1037 pxor %xmm0,%xmm1 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1061 | 1061 |
| 1062 | 1062 |
| 1063 movdqa %xmm0,%xmm4 | 1063 movdqa %xmm0,%xmm4 |
| 1064 psrlq $1,%xmm0 | 1064 psrlq $1,%xmm0 |
| 1065 pxor %xmm4,%xmm1 | 1065 pxor %xmm4,%xmm1 |
| 1066 pxor %xmm0,%xmm4 | 1066 pxor %xmm0,%xmm4 |
| 1067 psrlq $5,%xmm0 | 1067 psrlq $5,%xmm0 |
| 1068 pxor %xmm4,%xmm0 | 1068 pxor %xmm4,%xmm0 |
| 1069 psrlq $1,%xmm0 | 1069 psrlq $1,%xmm0 |
| 1070 pxor %xmm1,%xmm0 | 1070 pxor %xmm1,%xmm0 |
| 1071 » addq» $64,%rcx | 1071 » addq» $0x40,%rcx |
| 1072 jz .Ldone | 1072 jz .Ldone |
| 1073 movdqu 32(%rsi),%xmm7 | 1073 movdqu 32(%rsi),%xmm7 |
| 1074 » subq» $16,%rcx | 1074 » subq» $0x10,%rcx |
| 1075 jz .Lodd_tail | 1075 jz .Lodd_tail |
| 1076 .Lskip4x: | 1076 .Lskip4x: |
| 1077 | 1077 |
| 1078 | 1078 |
| 1079 | 1079 |
| 1080 | 1080 |
| 1081 | 1081 |
| 1082 movdqu (%rdx),%xmm8 | 1082 movdqu (%rdx),%xmm8 |
| 1083 movdqu 16(%rdx),%xmm3 | 1083 movdqu 16(%rdx),%xmm3 |
| 1084 .byte 102,69,15,56,0,194 | 1084 .byte 102,69,15,56,0,194 |
| 1085 .byte 102,65,15,56,0,218 | 1085 .byte 102,65,15,56,0,218 |
| 1086 pxor %xmm8,%xmm0 | 1086 pxor %xmm8,%xmm0 |
| 1087 | 1087 |
| 1088 movdqa %xmm3,%xmm5 | 1088 movdqa %xmm3,%xmm5 |
| 1089 pshufd $78,%xmm3,%xmm4 | 1089 pshufd $78,%xmm3,%xmm4 |
| 1090 pxor %xmm3,%xmm4 | 1090 pxor %xmm3,%xmm4 |
| 1091 .byte 102,15,58,68,218,0 | 1091 .byte 102,15,58,68,218,0 |
| 1092 .byte 102,15,58,68,234,17 | 1092 .byte 102,15,58,68,234,17 |
| 1093 .byte 102,15,58,68,231,0 | 1093 .byte 102,15,58,68,231,0 |
| 1094 | 1094 |
| 1095 leaq 32(%rdx),%rdx | 1095 leaq 32(%rdx),%rdx |
| 1096 nop | 1096 nop |
| 1097 » subq» $32,%rcx | 1097 » subq» $0x20,%rcx |
| 1098 jbe .Leven_tail | 1098 jbe .Leven_tail |
| 1099 nop | 1099 nop |
| 1100 jmp .Lmod_loop | 1100 jmp .Lmod_loop |
| 1101 | 1101 |
| 1102 .align 32 | 1102 .align 32 |
| 1103 .Lmod_loop: | 1103 .Lmod_loop: |
| 1104 movdqa %xmm0,%xmm1 | 1104 movdqa %xmm0,%xmm1 |
| 1105 movdqa %xmm4,%xmm8 | 1105 movdqa %xmm4,%xmm8 |
| 1106 pshufd $78,%xmm0,%xmm4 | 1106 pshufd $78,%xmm0,%xmm4 |
| 1107 pxor %xmm0,%xmm4 | 1107 pxor %xmm0,%xmm4 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1150 .byte 102,15,58,68,234,17 | 1150 .byte 102,15,58,68,234,17 |
| 1151 pxor %xmm9,%xmm1 | 1151 pxor %xmm9,%xmm1 |
| 1152 pxor %xmm0,%xmm9 | 1152 pxor %xmm0,%xmm9 |
| 1153 psrlq $5,%xmm0 | 1153 psrlq $5,%xmm0 |
| 1154 pxor %xmm9,%xmm0 | 1154 pxor %xmm9,%xmm0 |
| 1155 leaq 32(%rdx),%rdx | 1155 leaq 32(%rdx),%rdx |
| 1156 psrlq $1,%xmm0 | 1156 psrlq $1,%xmm0 |
| 1157 .byte 102,15,58,68,231,0 | 1157 .byte 102,15,58,68,231,0 |
| 1158 pxor %xmm1,%xmm0 | 1158 pxor %xmm1,%xmm0 |
| 1159 | 1159 |
| 1160 » subq» $32,%rcx | 1160 » subq» $0x20,%rcx |
| 1161 ja .Lmod_loop | 1161 ja .Lmod_loop |
| 1162 | 1162 |
| 1163 .Leven_tail: | 1163 .Leven_tail: |
| 1164 movdqa %xmm0,%xmm1 | 1164 movdqa %xmm0,%xmm1 |
| 1165 movdqa %xmm4,%xmm8 | 1165 movdqa %xmm4,%xmm8 |
| 1166 pshufd $78,%xmm0,%xmm4 | 1166 pshufd $78,%xmm0,%xmm4 |
| 1167 pxor %xmm0,%xmm4 | 1167 pxor %xmm0,%xmm4 |
| 1168 | 1168 |
| 1169 .byte 102,15,58,68,198,0 | 1169 .byte 102,15,58,68,198,0 |
| 1170 .byte 102,15,58,68,206,17 | 1170 .byte 102,15,58,68,206,17 |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1320 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | 1320 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
| 1321 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | 1321 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
| 1322 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | 1322 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
| 1323 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | 1323 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
| 1324 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | 1324 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
| 1325 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | 1325 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
| 1326 | 1326 |
| 1327 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 1327 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
| 1328 .align 64 | 1328 .align 64 |
| 1329 #endif | 1329 #endif |
| OLD | NEW |