OLD | NEW |
---|---|
1 ; LICENSE: | 1 ; LICENSE: |
2 ; This submission to NSS is to be made available under the terms of the | 2 ; This submission to NSS is to be made available under the terms of the |
3 ; Mozilla Public License, v. 2.0. You can obtain one at http: | 3 ; Mozilla Public License, v. 2.0. You can obtain one at http: |
4 ; //mozilla.org/MPL/2.0/. | 4 ; //mozilla.org/MPL/2.0/. |
5 ;############################################################################### | 5 ;############################################################################### |
6 ; Copyright(c) 2014, Intel Corp. | 6 ; Copyright(c) 2014, Intel Corp. |
7 ; Developers and authors: | 7 ; Developers and authors: |
8 ; Shay Gueron and Vlad Krasnov | 8 ; Shay Gueron and Vlad Krasnov |
9 ; Intel Corporation, Israel Development Centre, Haifa, Israel | 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel |
10 ; Please send feedback directly to crypto.feedback.alias@intel.com | 10 ; Please send feedback directly to crypto.feedback.alias@intel.com |
(...skipping 821 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
832 je @f | 832 je @f |
833 vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] | 833 vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
834 vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] | 834 vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
835 vmovdqu TMP2, XMMWORD PTR[14*16 + KS] | 835 vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
836 @@: | 836 @@: |
837 vaesenclast TMP1, TMP1, TMP2 | 837 vaesenclast TMP1, TMP1, TMP2 |
838 ; zero a temp location | 838 ; zero a temp location |
839 vpxor TMP2, TMP2, TMP2 | 839 vpxor TMP2, TMP2, TMP2 |
840 vmovdqa XMMWORD PTR[esp], TMP2 | 840 vmovdqa XMMWORD PTR[esp], TMP2 |
841 ; copy as many bytes as needed | 841 ; copy as many bytes as needed |
842 mov edi, edx | |
wtc
2014/04/30 02:00:50
The code below uses the 8-bit dl register (which i
| |
842 xor KS, KS | 843 xor KS, KS |
843 @@: | 844 @@: |
844 cmp len, KS | 845 cmp len, KS |
845 je @f | 846 je @f |
846 mov di, [PT + KS] | 847 mov dl, BYTE PTR[PT + KS] |
847 mov [esp + KS], di | 848 mov BYTE PTR[esp + KS], dl |
wtc
2014/04/30 02:00:50
Here we want to copy one byte at a time. Because o
agl
2014/04/30 17:38:11
I think this can be fixed with just:
mov di, BYTE
wtc
2014/04/30 21:27:21
Thanks for the suggestion. MASM doesn't allow
| |
848 inc KS | 849 inc KS |
849 jmp @b | 850 jmp @b |
850 @@: | 851 @@: |
851 vpxor TMP1, TMP1, XMMWORD PTR[esp] | 852 vpxor TMP1, TMP1, XMMWORD PTR[esp] |
852 vmovdqa XMMWORD PTR[esp], TMP1 | 853 vmovdqa XMMWORD PTR[esp], TMP1 |
853 xor KS, KS | 854 xor KS, KS |
854 @@: | 855 @@: |
855 cmp len, KS | 856 cmp len, KS |
856 je @f | 857 je @f |
857 mov di, [esp + KS] | 858 mov dl, BYTE PTR[esp + KS] |
858 mov [CT + KS], di | 859 mov BYTE PTR[CT + KS], dl |
859 inc KS | 860 inc KS |
860 jmp @b | 861 jmp @b |
861 @@: | 862 @@: |
862 cmp KS, 16 | 863 cmp KS, 16 |
863 je @f | 864 je @f |
864 mov BYTE PTR[esp + KS], 0 | 865 mov BYTE PTR[esp + KS], 0 |
865 inc KS | 866 inc KS |
866 jmp @b | 867 jmp @b |
867 @@: | 868 @@: |
869 mov edx, edi | |
wtc
2014/04/30 02:00:50
Here we restore the value of edx because we will u
| |
868 vmovdqa TMP1, XMMWORD PTR[esp] | 870 vmovdqa TMP1, XMMWORD PTR[esp] |
869 | 871 |
870 vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] | 872 vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
871 vpxor TMP1, TMP1, T | 873 vpxor TMP1, TMP1, T |
872 | 874 |
873 vmovdqu TMP0, XMMWORD PTR[Htbl] | 875 vmovdqu TMP0, XMMWORD PTR[Htbl] |
874 GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 | 876 GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
875 vmovdqu T, TMP1 | 877 vmovdqu T, TMP1 |
876 | 878 |
877 LEncDataEnd: | 879 LEncDataEnd: |
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1142 vmovdqu TMP2, XMMWORD PTR[12*16 + KS] | 1144 vmovdqu TMP2, XMMWORD PTR[12*16 + KS] |
1143 cmp NR, 12 | 1145 cmp NR, 12 |
1144 je @f | 1146 je @f |
1145 vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] | 1147 vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] |
1146 vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] | 1148 vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] |
1147 vmovdqu TMP2, XMMWORD PTR[14*16 + KS] | 1149 vmovdqu TMP2, XMMWORD PTR[14*16 + KS] |
1148 @@: | 1150 @@: |
1149 vaesenclast xmm7, TMP1, TMP2 | 1151 vaesenclast xmm7, TMP1, TMP2 |
1150 | 1152 |
1151 ; copy as many bytes as needed | 1153 ; copy as many bytes as needed |
1154 mov edi, edx | |
1152 xor KS, KS | 1155 xor KS, KS |
1153 @@: | 1156 @@: |
1154 cmp len, KS | 1157 cmp len, KS |
1155 je @f | 1158 je @f |
1156 mov di, [CT + KS] | 1159 mov dl, BYTE PTR[CT + KS] |
1157 mov [esp + KS], di | 1160 mov BYTE PTR[esp + KS], dl |
1158 inc KS | 1161 inc KS |
1159 jmp @b | 1162 jmp @b |
1160 @@: | 1163 @@: |
1161 cmp KS, 16 | 1164 cmp KS, 16 |
1162 je @f | 1165 je @f |
1163 mov BYTE PTR[esp + KS], 0 | 1166 mov BYTE PTR[esp + KS], 0 |
1164 inc KS | 1167 inc KS |
1165 jmp @b | 1168 jmp @b |
1166 @@: | 1169 @@: |
1167 | 1170 |
1171 mov edx, edi | |
wtc
2014/04/30 02:00:50
Here we restore the value of edx because we will u
| |
1168 vmovdqa TMP1, XMMWORD PTR[esp] | 1172 vmovdqa TMP1, XMMWORD PTR[esp] |
1169 vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] | 1173 vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] |
1170 vpxor TMP1, TMP1, T | 1174 vpxor TMP1, TMP1, T |
1171 | 1175 |
1172 vmovdqu TMP0, XMMWORD PTR[Htbl] | 1176 vmovdqu TMP0, XMMWORD PTR[Htbl] |
1173 GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 | 1177 GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 |
1174 vmovdqu T, TMP1 | 1178 vmovdqu T, TMP1 |
1175 | 1179 |
1176 | 1180 |
1177 vpxor xmm7, xmm7, XMMWORD PTR[esp] | 1181 vpxor xmm7, xmm7, XMMWORD PTR[esp] |
1178 vmovdqa XMMWORD PTR[esp], xmm7 | 1182 vmovdqa XMMWORD PTR[esp], xmm7 |
1183 mov edi, edx | |
1179 xor KS, KS | 1184 xor KS, KS |
1180 @@: | 1185 @@: |
1181 cmp len, KS | 1186 cmp len, KS |
1182 je @f | 1187 je @f |
1183 mov di, [esp + KS] | 1188 mov dl, BYTE PTR[esp + KS] |
1184 mov [PT + KS], di | 1189 mov BYTE PTR[PT + KS], dl |
1185 inc KS | 1190 inc KS |
1186 jmp @b | 1191 jmp @b |
1187 @@: | 1192 @@: |
1188 cmp KS, 16 | 1193 mov edx, edi |
1189 je @f | |
1190 mov BYTE PTR[PT + KS], 0 | |
1191 inc KS | |
1192 jmp @b | |
wtc
2014/04/30 02:00:50
This block of code seems to be a copy and paste er
| |
1193 @@: | |
1194 | 1194 |
1195 LDecDataEnd: | 1195 LDecDataEnd: |
1196 | 1196 |
1197 bswap aluCTR | 1197 bswap aluCTR |
1198 mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR | 1198 mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR |
1199 | 1199 |
1200 mov esp, ebp | 1200 mov esp, ebp |
1201 pop edi | 1201 pop edi |
1202 pop esi | 1202 pop esi |
1203 pop ebx | 1203 pop ebx |
1204 pop ebp | 1204 pop ebp |
1205 | 1205 |
1206 vzeroupper | 1206 vzeroupper |
1207 | 1207 |
1208 ret | 1208 ret |
1209 intel_aes_gcmDEC ENDP | 1209 intel_aes_gcmDEC ENDP |
1210 | 1210 |
1211 | 1211 |
1212 END | 1212 END |
OLD | NEW |