| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 | 4 |
| 5 .globl _gcm_gmult_4bit | 5 .globl _gcm_gmult_4bit |
| 6 .private_extern _gcm_gmult_4bit | 6 .private_extern _gcm_gmult_4bit |
| 7 | 7 |
| 8 .p2align 4 | 8 .p2align 4 |
| 9 _gcm_gmult_4bit: | 9 _gcm_gmult_4bit: |
| 10 pushq %rbx | 10 pushq %rbx |
| 11 pushq %rbp | 11 pushq %rbp |
| 12 pushq %r12 | 12 pushq %r12 |
| 13 L$gmult_prologue: | 13 L$gmult_prologue: |
| 14 | 14 |
| 15 movzbq 15(%rdi),%r8 | 15 movzbq 15(%rdi),%r8 |
| 16 leaq L$rem_4bit(%rip),%r11 | 16 leaq L$rem_4bit(%rip),%r11 |
| 17 xorq %rax,%rax | 17 xorq %rax,%rax |
| 18 xorq %rbx,%rbx | 18 xorq %rbx,%rbx |
| 19 movb %r8b,%al | 19 movb %r8b,%al |
| 20 movb %r8b,%bl | 20 movb %r8b,%bl |
| 21 shlb $4,%al | 21 shlb $4,%al |
| 22 movq $14,%rcx | 22 movq $14,%rcx |
| 23 movq 8(%rsi,%rax,1),%r8 | 23 movq 8(%rsi,%rax,1),%r8 |
| 24 movq (%rsi,%rax,1),%r9 | 24 movq (%rsi,%rax,1),%r9 |
| 25 » andb» $240,%bl | 25 » andb» $0xf0,%bl |
| 26 movq %r8,%rdx | 26 movq %r8,%rdx |
| 27 jmp L$oop1 | 27 jmp L$oop1 |
| 28 | 28 |
| 29 .p2align 4 | 29 .p2align 4 |
| 30 L$oop1: | 30 L$oop1: |
| 31 shrq $4,%r8 | 31 shrq $4,%r8 |
| 32 » andq» $15,%rdx | 32 » andq» $0xf,%rdx |
| 33 movq %r9,%r10 | 33 movq %r9,%r10 |
| 34 movb (%rdi,%rcx,1),%al | 34 movb (%rdi,%rcx,1),%al |
| 35 shrq $4,%r9 | 35 shrq $4,%r9 |
| 36 xorq 8(%rsi,%rbx,1),%r8 | 36 xorq 8(%rsi,%rbx,1),%r8 |
| 37 shlq $60,%r10 | 37 shlq $60,%r10 |
| 38 xorq (%rsi,%rbx,1),%r9 | 38 xorq (%rsi,%rbx,1),%r9 |
| 39 movb %al,%bl | 39 movb %al,%bl |
| 40 xorq (%r11,%rdx,8),%r9 | 40 xorq (%r11,%rdx,8),%r9 |
| 41 movq %r8,%rdx | 41 movq %r8,%rdx |
| 42 shlb $4,%al | 42 shlb $4,%al |
| 43 xorq %r10,%r8 | 43 xorq %r10,%r8 |
| 44 decq %rcx | 44 decq %rcx |
| 45 js L$break1 | 45 js L$break1 |
| 46 | 46 |
| 47 shrq $4,%r8 | 47 shrq $4,%r8 |
| 48 » andq» $15,%rdx | 48 » andq» $0xf,%rdx |
| 49 movq %r9,%r10 | 49 movq %r9,%r10 |
| 50 shrq $4,%r9 | 50 shrq $4,%r9 |
| 51 xorq 8(%rsi,%rax,1),%r8 | 51 xorq 8(%rsi,%rax,1),%r8 |
| 52 shlq $60,%r10 | 52 shlq $60,%r10 |
| 53 xorq (%rsi,%rax,1),%r9 | 53 xorq (%rsi,%rax,1),%r9 |
| 54 » andb» $240,%bl | 54 » andb» $0xf0,%bl |
| 55 xorq (%r11,%rdx,8),%r9 | 55 xorq (%r11,%rdx,8),%r9 |
| 56 movq %r8,%rdx | 56 movq %r8,%rdx |
| 57 xorq %r10,%r8 | 57 xorq %r10,%r8 |
| 58 jmp L$oop1 | 58 jmp L$oop1 |
| 59 | 59 |
| 60 .p2align 4 | 60 .p2align 4 |
| 61 L$break1: | 61 L$break1: |
| 62 shrq $4,%r8 | 62 shrq $4,%r8 |
| 63 » andq» $15,%rdx | 63 » andq» $0xf,%rdx |
| 64 movq %r9,%r10 | 64 movq %r9,%r10 |
| 65 shrq $4,%r9 | 65 shrq $4,%r9 |
| 66 xorq 8(%rsi,%rax,1),%r8 | 66 xorq 8(%rsi,%rax,1),%r8 |
| 67 shlq $60,%r10 | 67 shlq $60,%r10 |
| 68 xorq (%rsi,%rax,1),%r9 | 68 xorq (%rsi,%rax,1),%r9 |
| 69 » andb» $240,%bl | 69 » andb» $0xf0,%bl |
| 70 xorq (%r11,%rdx,8),%r9 | 70 xorq (%r11,%rdx,8),%r9 |
| 71 movq %r8,%rdx | 71 movq %r8,%rdx |
| 72 xorq %r10,%r8 | 72 xorq %r10,%r8 |
| 73 | 73 |
| 74 shrq $4,%r8 | 74 shrq $4,%r8 |
| 75 » andq» $15,%rdx | 75 » andq» $0xf,%rdx |
| 76 movq %r9,%r10 | 76 movq %r9,%r10 |
| 77 shrq $4,%r9 | 77 shrq $4,%r9 |
| 78 xorq 8(%rsi,%rbx,1),%r8 | 78 xorq 8(%rsi,%rbx,1),%r8 |
| 79 shlq $60,%r10 | 79 shlq $60,%r10 |
| 80 xorq (%rsi,%rbx,1),%r9 | 80 xorq (%rsi,%rbx,1),%r9 |
| 81 xorq %r10,%r8 | 81 xorq %r10,%r8 |
| 82 xorq (%r11,%rdx,8),%r9 | 82 xorq (%r11,%rdx,8),%r9 |
| 83 | 83 |
| 84 bswapq %r8 | 84 bswapq %r8 |
| 85 bswapq %r9 | 85 bswapq %r9 |
| (...skipping 787 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 873 .p2align 5 | 873 .p2align 5 |
| 874 _gcm_ghash_clmul: | 874 _gcm_ghash_clmul: |
| 875 L$_ghash_clmul: | 875 L$_ghash_clmul: |
| 876 movdqa L$bswap_mask(%rip),%xmm10 | 876 movdqa L$bswap_mask(%rip),%xmm10 |
| 877 | 877 |
| 878 movdqu (%rdi),%xmm0 | 878 movdqu (%rdi),%xmm0 |
| 879 movdqu (%rsi),%xmm2 | 879 movdqu (%rsi),%xmm2 |
| 880 movdqu 32(%rsi),%xmm7 | 880 movdqu 32(%rsi),%xmm7 |
| 881 .byte 102,65,15,56,0,194 | 881 .byte 102,65,15,56,0,194 |
| 882 | 882 |
| 883 » subq» $16,%rcx | 883 » subq» $0x10,%rcx |
| 884 jz L$odd_tail | 884 jz L$odd_tail |
| 885 | 885 |
| 886 movdqu 16(%rsi),%xmm6 | 886 movdqu 16(%rsi),%xmm6 |
| 887 movl _OPENSSL_ia32cap_P+4(%rip),%eax | 887 movl _OPENSSL_ia32cap_P+4(%rip),%eax |
| 888 » cmpq» $48,%rcx | 888 » cmpq» $0x30,%rcx |
| 889 jb L$skip4x | 889 jb L$skip4x |
| 890 | 890 |
| 891 andl $71303168,%eax | 891 andl $71303168,%eax |
| 892 cmpl $4194304,%eax | 892 cmpl $4194304,%eax |
| 893 je L$skip4x | 893 je L$skip4x |
| 894 | 894 |
| 895 » subq» $48,%rcx | 895 » subq» $0x30,%rcx |
| 896 » movq» $11547335547999543296,%rax | 896 » movq» $0xA040608020C0E000,%rax |
| 897 movdqu 48(%rsi),%xmm14 | 897 movdqu 48(%rsi),%xmm14 |
| 898 movdqu 64(%rsi),%xmm15 | 898 movdqu 64(%rsi),%xmm15 |
| 899 | 899 |
| 900 | 900 |
| 901 | 901 |
| 902 | 902 |
| 903 movdqu 48(%rdx),%xmm3 | 903 movdqu 48(%rdx),%xmm3 |
| 904 movdqu 32(%rdx),%xmm11 | 904 movdqu 32(%rdx),%xmm11 |
| 905 .byte 102,65,15,56,0,218 | 905 .byte 102,65,15,56,0,218 |
| 906 .byte 102,69,15,56,0,218 | 906 .byte 102,69,15,56,0,218 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 933 .byte 102,69,15,58,68,222,0 | 933 .byte 102,69,15,58,68,222,0 |
| 934 movdqa %xmm0,%xmm1 | 934 movdqa %xmm0,%xmm1 |
| 935 pshufd $78,%xmm0,%xmm8 | 935 pshufd $78,%xmm0,%xmm8 |
| 936 pxor %xmm0,%xmm8 | 936 pxor %xmm0,%xmm8 |
| 937 .byte 102,69,15,58,68,238,17 | 937 .byte 102,69,15,58,68,238,17 |
| 938 .byte 102,68,15,58,68,231,0 | 938 .byte 102,68,15,58,68,231,0 |
| 939 xorps %xmm11,%xmm3 | 939 xorps %xmm11,%xmm3 |
| 940 xorps %xmm13,%xmm5 | 940 xorps %xmm13,%xmm5 |
| 941 | 941 |
| 942 leaq 64(%rdx),%rdx | 942 leaq 64(%rdx),%rdx |
| 943 » subq» $64,%rcx | 943 » subq» $0x40,%rcx |
| 944 jc L$tail4x | 944 jc L$tail4x |
| 945 | 945 |
| 946 jmp L$mod4_loop | 946 jmp L$mod4_loop |
| 947 .p2align 5 | 947 .p2align 5 |
| 948 L$mod4_loop: | 948 L$mod4_loop: |
| 949 .byte 102,65,15,58,68,199,0 | 949 .byte 102,65,15,58,68,199,0 |
| 950 xorps %xmm12,%xmm4 | 950 xorps %xmm12,%xmm4 |
| 951 movdqu 48(%rdx),%xmm11 | 951 movdqu 48(%rdx),%xmm11 |
| 952 .byte 102,69,15,56,0,218 | 952 .byte 102,69,15,56,0,218 |
| 953 .byte 102,65,15,58,68,207,17 | 953 .byte 102,65,15,58,68,207,17 |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1016 movdqa %xmm0,%xmm1 | 1016 movdqa %xmm0,%xmm1 |
| 1017 .byte 102,69,15,58,68,238,17 | 1017 .byte 102,69,15,58,68,238,17 |
| 1018 xorps %xmm11,%xmm3 | 1018 xorps %xmm11,%xmm3 |
| 1019 pshufd $78,%xmm0,%xmm8 | 1019 pshufd $78,%xmm0,%xmm8 |
| 1020 pxor %xmm0,%xmm8 | 1020 pxor %xmm0,%xmm8 |
| 1021 | 1021 |
| 1022 .byte 102,68,15,58,68,231,0 | 1022 .byte 102,68,15,58,68,231,0 |
| 1023 xorps %xmm13,%xmm5 | 1023 xorps %xmm13,%xmm5 |
| 1024 | 1024 |
| 1025 leaq 64(%rdx),%rdx | 1025 leaq 64(%rdx),%rdx |
| 1026 » subq» $64,%rcx | 1026 » subq» $0x40,%rcx |
| 1027 jnc L$mod4_loop | 1027 jnc L$mod4_loop |
| 1028 | 1028 |
| 1029 L$tail4x: | 1029 L$tail4x: |
| 1030 .byte 102,65,15,58,68,199,0 | 1030 .byte 102,65,15,58,68,199,0 |
| 1031 .byte 102,65,15,58,68,207,17 | 1031 .byte 102,65,15,58,68,207,17 |
| 1032 .byte 102,68,15,58,68,199,16 | 1032 .byte 102,68,15,58,68,199,16 |
| 1033 xorps %xmm12,%xmm4 | 1033 xorps %xmm12,%xmm4 |
| 1034 xorps %xmm3,%xmm0 | 1034 xorps %xmm3,%xmm0 |
| 1035 xorps %xmm5,%xmm1 | 1035 xorps %xmm5,%xmm1 |
| 1036 pxor %xmm0,%xmm1 | 1036 pxor %xmm0,%xmm1 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1060 | 1060 |
| 1061 | 1061 |
| 1062 movdqa %xmm0,%xmm4 | 1062 movdqa %xmm0,%xmm4 |
| 1063 psrlq $1,%xmm0 | 1063 psrlq $1,%xmm0 |
| 1064 pxor %xmm4,%xmm1 | 1064 pxor %xmm4,%xmm1 |
| 1065 pxor %xmm0,%xmm4 | 1065 pxor %xmm0,%xmm4 |
| 1066 psrlq $5,%xmm0 | 1066 psrlq $5,%xmm0 |
| 1067 pxor %xmm4,%xmm0 | 1067 pxor %xmm4,%xmm0 |
| 1068 psrlq $1,%xmm0 | 1068 psrlq $1,%xmm0 |
| 1069 pxor %xmm1,%xmm0 | 1069 pxor %xmm1,%xmm0 |
| 1070 » addq» $64,%rcx | 1070 » addq» $0x40,%rcx |
| 1071 jz L$done | 1071 jz L$done |
| 1072 movdqu 32(%rsi),%xmm7 | 1072 movdqu 32(%rsi),%xmm7 |
| 1073 » subq» $16,%rcx | 1073 » subq» $0x10,%rcx |
| 1074 jz L$odd_tail | 1074 jz L$odd_tail |
| 1075 L$skip4x: | 1075 L$skip4x: |
| 1076 | 1076 |
| 1077 | 1077 |
| 1078 | 1078 |
| 1079 | 1079 |
| 1080 | 1080 |
| 1081 movdqu (%rdx),%xmm8 | 1081 movdqu (%rdx),%xmm8 |
| 1082 movdqu 16(%rdx),%xmm3 | 1082 movdqu 16(%rdx),%xmm3 |
| 1083 .byte 102,69,15,56,0,194 | 1083 .byte 102,69,15,56,0,194 |
| 1084 .byte 102,65,15,56,0,218 | 1084 .byte 102,65,15,56,0,218 |
| 1085 pxor %xmm8,%xmm0 | 1085 pxor %xmm8,%xmm0 |
| 1086 | 1086 |
| 1087 movdqa %xmm3,%xmm5 | 1087 movdqa %xmm3,%xmm5 |
| 1088 pshufd $78,%xmm3,%xmm4 | 1088 pshufd $78,%xmm3,%xmm4 |
| 1089 pxor %xmm3,%xmm4 | 1089 pxor %xmm3,%xmm4 |
| 1090 .byte 102,15,58,68,218,0 | 1090 .byte 102,15,58,68,218,0 |
| 1091 .byte 102,15,58,68,234,17 | 1091 .byte 102,15,58,68,234,17 |
| 1092 .byte 102,15,58,68,231,0 | 1092 .byte 102,15,58,68,231,0 |
| 1093 | 1093 |
| 1094 leaq 32(%rdx),%rdx | 1094 leaq 32(%rdx),%rdx |
| 1095 nop | 1095 nop |
| 1096 » subq» $32,%rcx | 1096 » subq» $0x20,%rcx |
| 1097 jbe L$even_tail | 1097 jbe L$even_tail |
| 1098 nop | 1098 nop |
| 1099 jmp L$mod_loop | 1099 jmp L$mod_loop |
| 1100 | 1100 |
| 1101 .p2align 5 | 1101 .p2align 5 |
| 1102 L$mod_loop: | 1102 L$mod_loop: |
| 1103 movdqa %xmm0,%xmm1 | 1103 movdqa %xmm0,%xmm1 |
| 1104 movdqa %xmm4,%xmm8 | 1104 movdqa %xmm4,%xmm8 |
| 1105 pshufd $78,%xmm0,%xmm4 | 1105 pshufd $78,%xmm0,%xmm4 |
| 1106 pxor %xmm0,%xmm4 | 1106 pxor %xmm0,%xmm4 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1149 .byte 102,15,58,68,234,17 | 1149 .byte 102,15,58,68,234,17 |
| 1150 pxor %xmm9,%xmm1 | 1150 pxor %xmm9,%xmm1 |
| 1151 pxor %xmm0,%xmm9 | 1151 pxor %xmm0,%xmm9 |
| 1152 psrlq $5,%xmm0 | 1152 psrlq $5,%xmm0 |
| 1153 pxor %xmm9,%xmm0 | 1153 pxor %xmm9,%xmm0 |
| 1154 leaq 32(%rdx),%rdx | 1154 leaq 32(%rdx),%rdx |
| 1155 psrlq $1,%xmm0 | 1155 psrlq $1,%xmm0 |
| 1156 .byte 102,15,58,68,231,0 | 1156 .byte 102,15,58,68,231,0 |
| 1157 pxor %xmm1,%xmm0 | 1157 pxor %xmm1,%xmm0 |
| 1158 | 1158 |
| 1159 » subq» $32,%rcx | 1159 » subq» $0x20,%rcx |
| 1160 ja L$mod_loop | 1160 ja L$mod_loop |
| 1161 | 1161 |
| 1162 L$even_tail: | 1162 L$even_tail: |
| 1163 movdqa %xmm0,%xmm1 | 1163 movdqa %xmm0,%xmm1 |
| 1164 movdqa %xmm4,%xmm8 | 1164 movdqa %xmm4,%xmm8 |
| 1165 pshufd $78,%xmm0,%xmm4 | 1165 pshufd $78,%xmm0,%xmm4 |
| 1166 pxor %xmm0,%xmm4 | 1166 pxor %xmm0,%xmm4 |
| 1167 | 1167 |
| 1168 .byte 102,15,58,68,198,0 | 1168 .byte 102,15,58,68,198,0 |
| 1169 .byte 102,15,58,68,206,17 | 1169 .byte 102,15,58,68,206,17 |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1319 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | 1319 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
| 1320 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | 1320 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
| 1321 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | 1321 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
| 1322 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | 1322 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
| 1323 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | 1323 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
| 1324 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | 1324 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
| 1325 | 1325 |
| 1326 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | 1326 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
| 1327 .p2align 6 | 1327 .p2align 6 |
| 1328 #endif | 1328 #endif |
| OLD | NEW |