Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(367)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/sha/sha512-x86_64.asm

Issue 2219933002: Land BoringSSL roll on master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 default rel 1 default rel
2 %define XMMWORD 2 %define XMMWORD
3 %define YMMWORD 3 %define YMMWORD
4 %define ZMMWORD 4 %define ZMMWORD
5 section .text code align=64 5 section .text code align=64
6 6
7 7
8 EXTERN OPENSSL_ia32cap_P 8 EXTERN OPENSSL_ia32cap_P
9 global sha512_block_data_order 9 global sha512_block_data_order
10 10
11 ALIGN 16 11 ALIGN 16
12 sha512_block_data_order: 12 sha512_block_data_order:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue 13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi 14 mov QWORD[16+rsp],rsi
15 mov rax,rsp 15 mov rax,rsp
16 $L$SEH_begin_sha512_block_data_order: 16 $L$SEH_begin_sha512_block_data_order:
17 mov rdi,rcx 17 mov rdi,rcx
18 mov rsi,rdx 18 mov rsi,rdx
19 mov rdx,r8 19 mov rdx,r8
20 20
21 21
22 lea r11,[OPENSSL_ia32cap_P]
23 mov r9d,DWORD[r11]
24 mov r10d,DWORD[4+r11]
25 mov r11d,DWORD[8+r11]
26 test r10d,2048
27 jnz NEAR $L$xop_shortcut
28 and r9d,1073741824
29 and r10d,268435968
30 or r10d,r9d
31 cmp r10d,1342177792
32 je NEAR $L$avx_shortcut
22 push rbx 33 push rbx
23 push rbp 34 push rbp
24 push r12 35 push r12
25 push r13 36 push r13
26 push r14 37 push r14
27 push r15 38 push r15
28 mov r11,rsp 39 mov r11,rsp
29 shl rdx,4 40 shl rdx,4
30 sub rsp,16*8+4*8 41 sub rsp,16*8+4*8
31 lea rdx,[rdx*8+rsi] 42 lea rdx,[rdx*8+rsi]
(...skipping 1762 matching lines...) Expand 10 before | Expand all | Expand 10 after
1794 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1805 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1795 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1806 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1796 1807
1797 DQ 0x0001020304050607,0x08090a0b0c0d0e0f 1808 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1798 DQ 0x0001020304050607,0x08090a0b0c0d0e0f 1809 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1799 DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 1810 DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
1800 DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 1811 DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
1801 DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 1812 DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
1802 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 1813 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
1803 DB 111,114,103,62,0 1814 DB 111,114,103,62,0
1815
1816 ALIGN 64
1817 sha512_block_data_order_xop:
1818 mov QWORD[8+rsp],rdi ;WIN64 prologue
1819 mov QWORD[16+rsp],rsi
1820 mov rax,rsp
1821 $L$SEH_begin_sha512_block_data_order_xop:
1822 mov rdi,rcx
1823 mov rsi,rdx
1824 mov rdx,r8
1825
1826
1827 $L$xop_shortcut:
1828 push rbx
1829 push rbp
1830 push r12
1831 push r13
1832 push r14
1833 push r15
1834 mov r11,rsp
1835 shl rdx,4
1836 sub rsp,256
1837 lea rdx,[rdx*8+rsi]
1838 and rsp,-64
1839 mov QWORD[((128+0))+rsp],rdi
1840 mov QWORD[((128+8))+rsp],rsi
1841 mov QWORD[((128+16))+rsp],rdx
1842 mov QWORD[((128+24))+rsp],r11
1843 movaps XMMWORD[(128+32)+rsp],xmm6
1844 movaps XMMWORD[(128+48)+rsp],xmm7
1845 movaps XMMWORD[(128+64)+rsp],xmm8
1846 movaps XMMWORD[(128+80)+rsp],xmm9
1847 movaps XMMWORD[(128+96)+rsp],xmm10
1848 movaps XMMWORD[(128+112)+rsp],xmm11
1849 $L$prologue_xop:
1850
1851 vzeroupper
1852 mov rax,QWORD[rdi]
1853 mov rbx,QWORD[8+rdi]
1854 mov rcx,QWORD[16+rdi]
1855 mov rdx,QWORD[24+rdi]
1856 mov r8,QWORD[32+rdi]
1857 mov r9,QWORD[40+rdi]
1858 mov r10,QWORD[48+rdi]
1859 mov r11,QWORD[56+rdi]
1860 jmp NEAR $L$loop_xop
1861 ALIGN 16
1862 $L$loop_xop:
1863 vmovdqa xmm11,XMMWORD[((K512+1280))]
1864 vmovdqu xmm0,XMMWORD[rsi]
1865 lea rbp,[((K512+128))]
1866 vmovdqu xmm1,XMMWORD[16+rsi]
1867 vmovdqu xmm2,XMMWORD[32+rsi]
1868 vpshufb xmm0,xmm0,xmm11
1869 vmovdqu xmm3,XMMWORD[48+rsi]
1870 vpshufb xmm1,xmm1,xmm11
1871 vmovdqu xmm4,XMMWORD[64+rsi]
1872 vpshufb xmm2,xmm2,xmm11
1873 vmovdqu xmm5,XMMWORD[80+rsi]
1874 vpshufb xmm3,xmm3,xmm11
1875 vmovdqu xmm6,XMMWORD[96+rsi]
1876 vpshufb xmm4,xmm4,xmm11
1877 vmovdqu xmm7,XMMWORD[112+rsi]
1878 vpshufb xmm5,xmm5,xmm11
1879 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
1880 vpshufb xmm6,xmm6,xmm11
1881 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
1882 vpshufb xmm7,xmm7,xmm11
1883 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
1884 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
1885 vmovdqa XMMWORD[rsp],xmm8
1886 vpaddq xmm8,xmm4,XMMWORD[rbp]
1887 vmovdqa XMMWORD[16+rsp],xmm9
1888 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
1889 vmovdqa XMMWORD[32+rsp],xmm10
1890 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
1891 vmovdqa XMMWORD[48+rsp],xmm11
1892 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
1893 vmovdqa XMMWORD[64+rsp],xmm8
1894 mov r14,rax
1895 vmovdqa XMMWORD[80+rsp],xmm9
1896 mov rdi,rbx
1897 vmovdqa XMMWORD[96+rsp],xmm10
1898 xor rdi,rcx
1899 vmovdqa XMMWORD[112+rsp],xmm11
1900 mov r13,r8
1901 jmp NEAR $L$xop_00_47
1902
1903 ALIGN 16
1904 $L$xop_00_47:
1905 add rbp,256
1906 vpalignr xmm8,xmm1,xmm0,8
1907 ror r13,23
1908 mov rax,r14
1909 vpalignr xmm11,xmm5,xmm4,8
1910 mov r12,r9
1911 ror r14,5
1912 DB 143,72,120,195,200,56
1913 xor r13,r8
1914 xor r12,r10
1915 vpsrlq xmm8,xmm8,7
1916 ror r13,4
1917 xor r14,rax
1918 vpaddq xmm0,xmm0,xmm11
1919 and r12,r8
1920 xor r13,r8
1921 add r11,QWORD[rsp]
1922 mov r15,rax
1923 DB 143,72,120,195,209,7
1924 xor r12,r10
1925 ror r14,6
1926 vpxor xmm8,xmm8,xmm9
1927 xor r15,rbx
1928 add r11,r12
1929 ror r13,14
1930 and rdi,r15
1931 DB 143,104,120,195,223,3
1932 xor r14,rax
1933 add r11,r13
1934 vpxor xmm8,xmm8,xmm10
1935 xor rdi,rbx
1936 ror r14,28
1937 vpsrlq xmm10,xmm7,6
1938 add rdx,r11
1939 add r11,rdi
1940 vpaddq xmm0,xmm0,xmm8
1941 mov r13,rdx
1942 add r14,r11
1943 DB 143,72,120,195,203,42
1944 ror r13,23
1945 mov r11,r14
1946 vpxor xmm11,xmm11,xmm10
1947 mov r12,r8
1948 ror r14,5
1949 xor r13,rdx
1950 xor r12,r9
1951 vpxor xmm11,xmm11,xmm9
1952 ror r13,4
1953 xor r14,r11
1954 and r12,rdx
1955 xor r13,rdx
1956 vpaddq xmm0,xmm0,xmm11
1957 add r10,QWORD[8+rsp]
1958 mov rdi,r11
1959 xor r12,r9
1960 ror r14,6
1961 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
1962 xor rdi,rax
1963 add r10,r12
1964 ror r13,14
1965 and r15,rdi
1966 xor r14,r11
1967 add r10,r13
1968 xor r15,rax
1969 ror r14,28
1970 add rcx,r10
1971 add r10,r15
1972 mov r13,rcx
1973 add r14,r10
1974 vmovdqa XMMWORD[rsp],xmm10
1975 vpalignr xmm8,xmm2,xmm1,8
1976 ror r13,23
1977 mov r10,r14
1978 vpalignr xmm11,xmm6,xmm5,8
1979 mov r12,rdx
1980 ror r14,5
1981 DB 143,72,120,195,200,56
1982 xor r13,rcx
1983 xor r12,r8
1984 vpsrlq xmm8,xmm8,7
1985 ror r13,4
1986 xor r14,r10
1987 vpaddq xmm1,xmm1,xmm11
1988 and r12,rcx
1989 xor r13,rcx
1990 add r9,QWORD[16+rsp]
1991 mov r15,r10
1992 DB 143,72,120,195,209,7
1993 xor r12,r8
1994 ror r14,6
1995 vpxor xmm8,xmm8,xmm9
1996 xor r15,r11
1997 add r9,r12
1998 ror r13,14
1999 and rdi,r15
2000 DB 143,104,120,195,216,3
2001 xor r14,r10
2002 add r9,r13
2003 vpxor xmm8,xmm8,xmm10
2004 xor rdi,r11
2005 ror r14,28
2006 vpsrlq xmm10,xmm0,6
2007 add rbx,r9
2008 add r9,rdi
2009 vpaddq xmm1,xmm1,xmm8
2010 mov r13,rbx
2011 add r14,r9
2012 DB 143,72,120,195,203,42
2013 ror r13,23
2014 mov r9,r14
2015 vpxor xmm11,xmm11,xmm10
2016 mov r12,rcx
2017 ror r14,5
2018 xor r13,rbx
2019 xor r12,rdx
2020 vpxor xmm11,xmm11,xmm9
2021 ror r13,4
2022 xor r14,r9
2023 and r12,rbx
2024 xor r13,rbx
2025 vpaddq xmm1,xmm1,xmm11
2026 add r8,QWORD[24+rsp]
2027 mov rdi,r9
2028 xor r12,rdx
2029 ror r14,6
2030 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
2031 xor rdi,r10
2032 add r8,r12
2033 ror r13,14
2034 and r15,rdi
2035 xor r14,r9
2036 add r8,r13
2037 xor r15,r10
2038 ror r14,28
2039 add rax,r8
2040 add r8,r15
2041 mov r13,rax
2042 add r14,r8
2043 vmovdqa XMMWORD[16+rsp],xmm10
2044 vpalignr xmm8,xmm3,xmm2,8
2045 ror r13,23
2046 mov r8,r14
2047 vpalignr xmm11,xmm7,xmm6,8
2048 mov r12,rbx
2049 ror r14,5
2050 DB 143,72,120,195,200,56
2051 xor r13,rax
2052 xor r12,rcx
2053 vpsrlq xmm8,xmm8,7
2054 ror r13,4
2055 xor r14,r8
2056 vpaddq xmm2,xmm2,xmm11
2057 and r12,rax
2058 xor r13,rax
2059 add rdx,QWORD[32+rsp]
2060 mov r15,r8
2061 DB 143,72,120,195,209,7
2062 xor r12,rcx
2063 ror r14,6
2064 vpxor xmm8,xmm8,xmm9
2065 xor r15,r9
2066 add rdx,r12
2067 ror r13,14
2068 and rdi,r15
2069 DB 143,104,120,195,217,3
2070 xor r14,r8
2071 add rdx,r13
2072 vpxor xmm8,xmm8,xmm10
2073 xor rdi,r9
2074 ror r14,28
2075 vpsrlq xmm10,xmm1,6
2076 add r11,rdx
2077 add rdx,rdi
2078 vpaddq xmm2,xmm2,xmm8
2079 mov r13,r11
2080 add r14,rdx
2081 DB 143,72,120,195,203,42
2082 ror r13,23
2083 mov rdx,r14
2084 vpxor xmm11,xmm11,xmm10
2085 mov r12,rax
2086 ror r14,5
2087 xor r13,r11
2088 xor r12,rbx
2089 vpxor xmm11,xmm11,xmm9
2090 ror r13,4
2091 xor r14,rdx
2092 and r12,r11
2093 xor r13,r11
2094 vpaddq xmm2,xmm2,xmm11
2095 add rcx,QWORD[40+rsp]
2096 mov rdi,rdx
2097 xor r12,rbx
2098 ror r14,6
2099 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
2100 xor rdi,r8
2101 add rcx,r12
2102 ror r13,14
2103 and r15,rdi
2104 xor r14,rdx
2105 add rcx,r13
2106 xor r15,r8
2107 ror r14,28
2108 add r10,rcx
2109 add rcx,r15
2110 mov r13,r10
2111 add r14,rcx
2112 vmovdqa XMMWORD[32+rsp],xmm10
2113 vpalignr xmm8,xmm4,xmm3,8
2114 ror r13,23
2115 mov rcx,r14
2116 vpalignr xmm11,xmm0,xmm7,8
2117 mov r12,r11
2118 ror r14,5
2119 DB 143,72,120,195,200,56
2120 xor r13,r10
2121 xor r12,rax
2122 vpsrlq xmm8,xmm8,7
2123 ror r13,4
2124 xor r14,rcx
2125 vpaddq xmm3,xmm3,xmm11
2126 and r12,r10
2127 xor r13,r10
2128 add rbx,QWORD[48+rsp]
2129 mov r15,rcx
2130 DB 143,72,120,195,209,7
2131 xor r12,rax
2132 ror r14,6
2133 vpxor xmm8,xmm8,xmm9
2134 xor r15,rdx
2135 add rbx,r12
2136 ror r13,14
2137 and rdi,r15
2138 DB 143,104,120,195,218,3
2139 xor r14,rcx
2140 add rbx,r13
2141 vpxor xmm8,xmm8,xmm10
2142 xor rdi,rdx
2143 ror r14,28
2144 vpsrlq xmm10,xmm2,6
2145 add r9,rbx
2146 add rbx,rdi
2147 vpaddq xmm3,xmm3,xmm8
2148 mov r13,r9
2149 add r14,rbx
2150 DB 143,72,120,195,203,42
2151 ror r13,23
2152 mov rbx,r14
2153 vpxor xmm11,xmm11,xmm10
2154 mov r12,r10
2155 ror r14,5
2156 xor r13,r9
2157 xor r12,r11
2158 vpxor xmm11,xmm11,xmm9
2159 ror r13,4
2160 xor r14,rbx
2161 and r12,r9
2162 xor r13,r9
2163 vpaddq xmm3,xmm3,xmm11
2164 add rax,QWORD[56+rsp]
2165 mov rdi,rbx
2166 xor r12,r11
2167 ror r14,6
2168 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
2169 xor rdi,rcx
2170 add rax,r12
2171 ror r13,14
2172 and r15,rdi
2173 xor r14,rbx
2174 add rax,r13
2175 xor r15,rcx
2176 ror r14,28
2177 add r8,rax
2178 add rax,r15
2179 mov r13,r8
2180 add r14,rax
2181 vmovdqa XMMWORD[48+rsp],xmm10
2182 vpalignr xmm8,xmm5,xmm4,8
2183 ror r13,23
2184 mov rax,r14
2185 vpalignr xmm11,xmm1,xmm0,8
2186 mov r12,r9
2187 ror r14,5
2188 DB 143,72,120,195,200,56
2189 xor r13,r8
2190 xor r12,r10
2191 vpsrlq xmm8,xmm8,7
2192 ror r13,4
2193 xor r14,rax
2194 vpaddq xmm4,xmm4,xmm11
2195 and r12,r8
2196 xor r13,r8
2197 add r11,QWORD[64+rsp]
2198 mov r15,rax
2199 DB 143,72,120,195,209,7
2200 xor r12,r10
2201 ror r14,6
2202 vpxor xmm8,xmm8,xmm9
2203 xor r15,rbx
2204 add r11,r12
2205 ror r13,14
2206 and rdi,r15
2207 DB 143,104,120,195,219,3
2208 xor r14,rax
2209 add r11,r13
2210 vpxor xmm8,xmm8,xmm10
2211 xor rdi,rbx
2212 ror r14,28
2213 vpsrlq xmm10,xmm3,6
2214 add rdx,r11
2215 add r11,rdi
2216 vpaddq xmm4,xmm4,xmm8
2217 mov r13,rdx
2218 add r14,r11
2219 DB 143,72,120,195,203,42
2220 ror r13,23
2221 mov r11,r14
2222 vpxor xmm11,xmm11,xmm10
2223 mov r12,r8
2224 ror r14,5
2225 xor r13,rdx
2226 xor r12,r9
2227 vpxor xmm11,xmm11,xmm9
2228 ror r13,4
2229 xor r14,r11
2230 and r12,rdx
2231 xor r13,rdx
2232 vpaddq xmm4,xmm4,xmm11
2233 add r10,QWORD[72+rsp]
2234 mov rdi,r11
2235 xor r12,r9
2236 ror r14,6
2237 vpaddq xmm10,xmm4,XMMWORD[rbp]
2238 xor rdi,rax
2239 add r10,r12
2240 ror r13,14
2241 and r15,rdi
2242 xor r14,r11
2243 add r10,r13
2244 xor r15,rax
2245 ror r14,28
2246 add rcx,r10
2247 add r10,r15
2248 mov r13,rcx
2249 add r14,r10
2250 vmovdqa XMMWORD[64+rsp],xmm10
2251 vpalignr xmm8,xmm6,xmm5,8
2252 ror r13,23
2253 mov r10,r14
2254 vpalignr xmm11,xmm2,xmm1,8
2255 mov r12,rdx
2256 ror r14,5
2257 DB 143,72,120,195,200,56
2258 xor r13,rcx
2259 xor r12,r8
2260 vpsrlq xmm8,xmm8,7
2261 ror r13,4
2262 xor r14,r10
2263 vpaddq xmm5,xmm5,xmm11
2264 and r12,rcx
2265 xor r13,rcx
2266 add r9,QWORD[80+rsp]
2267 mov r15,r10
2268 DB 143,72,120,195,209,7
2269 xor r12,r8
2270 ror r14,6
2271 vpxor xmm8,xmm8,xmm9
2272 xor r15,r11
2273 add r9,r12
2274 ror r13,14
2275 and rdi,r15
2276 DB 143,104,120,195,220,3
2277 xor r14,r10
2278 add r9,r13
2279 vpxor xmm8,xmm8,xmm10
2280 xor rdi,r11
2281 ror r14,28
2282 vpsrlq xmm10,xmm4,6
2283 add rbx,r9
2284 add r9,rdi
2285 vpaddq xmm5,xmm5,xmm8
2286 mov r13,rbx
2287 add r14,r9
2288 DB 143,72,120,195,203,42
2289 ror r13,23
2290 mov r9,r14
2291 vpxor xmm11,xmm11,xmm10
2292 mov r12,rcx
2293 ror r14,5
2294 xor r13,rbx
2295 xor r12,rdx
2296 vpxor xmm11,xmm11,xmm9
2297 ror r13,4
2298 xor r14,r9
2299 and r12,rbx
2300 xor r13,rbx
2301 vpaddq xmm5,xmm5,xmm11
2302 add r8,QWORD[88+rsp]
2303 mov rdi,r9
2304 xor r12,rdx
2305 ror r14,6
2306 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
2307 xor rdi,r10
2308 add r8,r12
2309 ror r13,14
2310 and r15,rdi
2311 xor r14,r9
2312 add r8,r13
2313 xor r15,r10
2314 ror r14,28
2315 add rax,r8
2316 add r8,r15
2317 mov r13,rax
2318 add r14,r8
2319 vmovdqa XMMWORD[80+rsp],xmm10
2320 vpalignr xmm8,xmm7,xmm6,8
2321 ror r13,23
2322 mov r8,r14
2323 vpalignr xmm11,xmm3,xmm2,8
2324 mov r12,rbx
2325 ror r14,5
2326 DB 143,72,120,195,200,56
2327 xor r13,rax
2328 xor r12,rcx
2329 vpsrlq xmm8,xmm8,7
2330 ror r13,4
2331 xor r14,r8
2332 vpaddq xmm6,xmm6,xmm11
2333 and r12,rax
2334 xor r13,rax
2335 add rdx,QWORD[96+rsp]
2336 mov r15,r8
2337 DB 143,72,120,195,209,7
2338 xor r12,rcx
2339 ror r14,6
2340 vpxor xmm8,xmm8,xmm9
2341 xor r15,r9
2342 add rdx,r12
2343 ror r13,14
2344 and rdi,r15
2345 DB 143,104,120,195,221,3
2346 xor r14,r8
2347 add rdx,r13
2348 vpxor xmm8,xmm8,xmm10
2349 xor rdi,r9
2350 ror r14,28
2351 vpsrlq xmm10,xmm5,6
2352 add r11,rdx
2353 add rdx,rdi
2354 vpaddq xmm6,xmm6,xmm8
2355 mov r13,r11
2356 add r14,rdx
2357 DB 143,72,120,195,203,42
2358 ror r13,23
2359 mov rdx,r14
2360 vpxor xmm11,xmm11,xmm10
2361 mov r12,rax
2362 ror r14,5
2363 xor r13,r11
2364 xor r12,rbx
2365 vpxor xmm11,xmm11,xmm9
2366 ror r13,4
2367 xor r14,rdx
2368 and r12,r11
2369 xor r13,r11
2370 vpaddq xmm6,xmm6,xmm11
2371 add rcx,QWORD[104+rsp]
2372 mov rdi,rdx
2373 xor r12,rbx
2374 ror r14,6
2375 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
2376 xor rdi,r8
2377 add rcx,r12
2378 ror r13,14
2379 and r15,rdi
2380 xor r14,rdx
2381 add rcx,r13
2382 xor r15,r8
2383 ror r14,28
2384 add r10,rcx
2385 add rcx,r15
2386 mov r13,r10
2387 add r14,rcx
2388 vmovdqa XMMWORD[96+rsp],xmm10
2389 vpalignr xmm8,xmm0,xmm7,8
2390 ror r13,23
2391 mov rcx,r14
2392 vpalignr xmm11,xmm4,xmm3,8
2393 mov r12,r11
2394 ror r14,5
2395 DB 143,72,120,195,200,56
2396 xor r13,r10
2397 xor r12,rax
2398 vpsrlq xmm8,xmm8,7
2399 ror r13,4
2400 xor r14,rcx
2401 vpaddq xmm7,xmm7,xmm11
2402 and r12,r10
2403 xor r13,r10
2404 add rbx,QWORD[112+rsp]
2405 mov r15,rcx
2406 DB 143,72,120,195,209,7
2407 xor r12,rax
2408 ror r14,6
2409 vpxor xmm8,xmm8,xmm9
2410 xor r15,rdx
2411 add rbx,r12
2412 ror r13,14
2413 and rdi,r15
2414 DB 143,104,120,195,222,3
2415 xor r14,rcx
2416 add rbx,r13
2417 vpxor xmm8,xmm8,xmm10
2418 xor rdi,rdx
2419 ror r14,28
2420 vpsrlq xmm10,xmm6,6
2421 add r9,rbx
2422 add rbx,rdi
2423 vpaddq xmm7,xmm7,xmm8
2424 mov r13,r9
2425 add r14,rbx
2426 DB 143,72,120,195,203,42
2427 ror r13,23
2428 mov rbx,r14
2429 vpxor xmm11,xmm11,xmm10
2430 mov r12,r10
2431 ror r14,5
2432 xor r13,r9
2433 xor r12,r11
2434 vpxor xmm11,xmm11,xmm9
2435 ror r13,4
2436 xor r14,rbx
2437 and r12,r9
2438 xor r13,r9
2439 vpaddq xmm7,xmm7,xmm11
2440 add rax,QWORD[120+rsp]
2441 mov rdi,rbx
2442 xor r12,r11
2443 ror r14,6
2444 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
2445 xor rdi,rcx
2446 add rax,r12
2447 ror r13,14
2448 and r15,rdi
2449 xor r14,rbx
2450 add rax,r13
2451 xor r15,rcx
2452 ror r14,28
2453 add r8,rax
2454 add rax,r15
2455 mov r13,r8
2456 add r14,rax
2457 vmovdqa XMMWORD[112+rsp],xmm10
2458 cmp BYTE[135+rbp],0
2459 jne NEAR $L$xop_00_47
2460 ror r13,23
2461 mov rax,r14
2462 mov r12,r9
2463 ror r14,5
2464 xor r13,r8
2465 xor r12,r10
2466 ror r13,4
2467 xor r14,rax
2468 and r12,r8
2469 xor r13,r8
2470 add r11,QWORD[rsp]
2471 mov r15,rax
2472 xor r12,r10
2473 ror r14,6
2474 xor r15,rbx
2475 add r11,r12
2476 ror r13,14
2477 and rdi,r15
2478 xor r14,rax
2479 add r11,r13
2480 xor rdi,rbx
2481 ror r14,28
2482 add rdx,r11
2483 add r11,rdi
2484 mov r13,rdx
2485 add r14,r11
2486 ror r13,23
2487 mov r11,r14
2488 mov r12,r8
2489 ror r14,5
2490 xor r13,rdx
2491 xor r12,r9
2492 ror r13,4
2493 xor r14,r11
2494 and r12,rdx
2495 xor r13,rdx
2496 add r10,QWORD[8+rsp]
2497 mov rdi,r11
2498 xor r12,r9
2499 ror r14,6
2500 xor rdi,rax
2501 add r10,r12
2502 ror r13,14
2503 and r15,rdi
2504 xor r14,r11
2505 add r10,r13
2506 xor r15,rax
2507 ror r14,28
2508 add rcx,r10
2509 add r10,r15
2510 mov r13,rcx
2511 add r14,r10
2512 ror r13,23
2513 mov r10,r14
2514 mov r12,rdx
2515 ror r14,5
2516 xor r13,rcx
2517 xor r12,r8
2518 ror r13,4
2519 xor r14,r10
2520 and r12,rcx
2521 xor r13,rcx
2522 add r9,QWORD[16+rsp]
2523 mov r15,r10
2524 xor r12,r8
2525 ror r14,6
2526 xor r15,r11
2527 add r9,r12
2528 ror r13,14
2529 and rdi,r15
2530 xor r14,r10
2531 add r9,r13
2532 xor rdi,r11
2533 ror r14,28
2534 add rbx,r9
2535 add r9,rdi
2536 mov r13,rbx
2537 add r14,r9
2538 ror r13,23
2539 mov r9,r14
2540 mov r12,rcx
2541 ror r14,5
2542 xor r13,rbx
2543 xor r12,rdx
2544 ror r13,4
2545 xor r14,r9
2546 and r12,rbx
2547 xor r13,rbx
2548 add r8,QWORD[24+rsp]
2549 mov rdi,r9
2550 xor r12,rdx
2551 ror r14,6
2552 xor rdi,r10
2553 add r8,r12
2554 ror r13,14
2555 and r15,rdi
2556 xor r14,r9
2557 add r8,r13
2558 xor r15,r10
2559 ror r14,28
2560 add rax,r8
2561 add r8,r15
2562 mov r13,rax
2563 add r14,r8
2564 ror r13,23
2565 mov r8,r14
2566 mov r12,rbx
2567 ror r14,5
2568 xor r13,rax
2569 xor r12,rcx
2570 ror r13,4
2571 xor r14,r8
2572 and r12,rax
2573 xor r13,rax
2574 add rdx,QWORD[32+rsp]
2575 mov r15,r8
2576 xor r12,rcx
2577 ror r14,6
2578 xor r15,r9
2579 add rdx,r12
2580 ror r13,14
2581 and rdi,r15
2582 xor r14,r8
2583 add rdx,r13
2584 xor rdi,r9
2585 ror r14,28
2586 add r11,rdx
2587 add rdx,rdi
2588 mov r13,r11
2589 add r14,rdx
2590 ror r13,23
2591 mov rdx,r14
2592 mov r12,rax
2593 ror r14,5
2594 xor r13,r11
2595 xor r12,rbx
2596 ror r13,4
2597 xor r14,rdx
2598 and r12,r11
2599 xor r13,r11
2600 add rcx,QWORD[40+rsp]
2601 mov rdi,rdx
2602 xor r12,rbx
2603 ror r14,6
2604 xor rdi,r8
2605 add rcx,r12
2606 ror r13,14
2607 and r15,rdi
2608 xor r14,rdx
2609 add rcx,r13
2610 xor r15,r8
2611 ror r14,28
2612 add r10,rcx
2613 add rcx,r15
2614 mov r13,r10
2615 add r14,rcx
2616 ror r13,23
2617 mov rcx,r14
2618 mov r12,r11
2619 ror r14,5
2620 xor r13,r10
2621 xor r12,rax
2622 ror r13,4
2623 xor r14,rcx
2624 and r12,r10
2625 xor r13,r10
2626 add rbx,QWORD[48+rsp]
2627 mov r15,rcx
2628 xor r12,rax
2629 ror r14,6
2630 xor r15,rdx
2631 add rbx,r12
2632 ror r13,14
2633 and rdi,r15
2634 xor r14,rcx
2635 add rbx,r13
2636 xor rdi,rdx
2637 ror r14,28
2638 add r9,rbx
2639 add rbx,rdi
2640 mov r13,r9
2641 add r14,rbx
2642 ror r13,23
2643 mov rbx,r14
2644 mov r12,r10
2645 ror r14,5
2646 xor r13,r9
2647 xor r12,r11
2648 ror r13,4
2649 xor r14,rbx
2650 and r12,r9
2651 xor r13,r9
2652 add rax,QWORD[56+rsp]
2653 mov rdi,rbx
2654 xor r12,r11
2655 ror r14,6
2656 xor rdi,rcx
2657 add rax,r12
2658 ror r13,14
2659 and r15,rdi
2660 xor r14,rbx
2661 add rax,r13
2662 xor r15,rcx
2663 ror r14,28
2664 add r8,rax
2665 add rax,r15
2666 mov r13,r8
2667 add r14,rax
2668 ror r13,23
2669 mov rax,r14
2670 mov r12,r9
2671 ror r14,5
2672 xor r13,r8
2673 xor r12,r10
2674 ror r13,4
2675 xor r14,rax
2676 and r12,r8
2677 xor r13,r8
2678 add r11,QWORD[64+rsp]
2679 mov r15,rax
2680 xor r12,r10
2681 ror r14,6
2682 xor r15,rbx
2683 add r11,r12
2684 ror r13,14
2685 and rdi,r15
2686 xor r14,rax
2687 add r11,r13
2688 xor rdi,rbx
2689 ror r14,28
2690 add rdx,r11
2691 add r11,rdi
2692 mov r13,rdx
2693 add r14,r11
2694 ror r13,23
2695 mov r11,r14
2696 mov r12,r8
2697 ror r14,5
2698 xor r13,rdx
2699 xor r12,r9
2700 ror r13,4
2701 xor r14,r11
2702 and r12,rdx
2703 xor r13,rdx
2704 add r10,QWORD[72+rsp]
2705 mov rdi,r11
2706 xor r12,r9
2707 ror r14,6
2708 xor rdi,rax
2709 add r10,r12
2710 ror r13,14
2711 and r15,rdi
2712 xor r14,r11
2713 add r10,r13
2714 xor r15,rax
2715 ror r14,28
2716 add rcx,r10
2717 add r10,r15
2718 mov r13,rcx
2719 add r14,r10
2720 ror r13,23
2721 mov r10,r14
2722 mov r12,rdx
2723 ror r14,5
2724 xor r13,rcx
2725 xor r12,r8
2726 ror r13,4
2727 xor r14,r10
2728 and r12,rcx
2729 xor r13,rcx
2730 add r9,QWORD[80+rsp]
2731 mov r15,r10
2732 xor r12,r8
2733 ror r14,6
2734 xor r15,r11
2735 add r9,r12
2736 ror r13,14
2737 and rdi,r15
2738 xor r14,r10
2739 add r9,r13
2740 xor rdi,r11
2741 ror r14,28
2742 add rbx,r9
2743 add r9,rdi
2744 mov r13,rbx
2745 add r14,r9
2746 ror r13,23
2747 mov r9,r14
2748 mov r12,rcx
2749 ror r14,5
2750 xor r13,rbx
2751 xor r12,rdx
2752 ror r13,4
2753 xor r14,r9
2754 and r12,rbx
2755 xor r13,rbx
2756 add r8,QWORD[88+rsp]
2757 mov rdi,r9
2758 xor r12,rdx
2759 ror r14,6
2760 xor rdi,r10
2761 add r8,r12
2762 ror r13,14
2763 and r15,rdi
2764 xor r14,r9
2765 add r8,r13
2766 xor r15,r10
2767 ror r14,28
2768 add rax,r8
2769 add r8,r15
2770 mov r13,rax
2771 add r14,r8
2772 ror r13,23
2773 mov r8,r14
2774 mov r12,rbx
2775 ror r14,5
2776 xor r13,rax
2777 xor r12,rcx
2778 ror r13,4
2779 xor r14,r8
2780 and r12,rax
2781 xor r13,rax
2782 add rdx,QWORD[96+rsp]
2783 mov r15,r8
2784 xor r12,rcx
2785 ror r14,6
2786 xor r15,r9
2787 add rdx,r12
2788 ror r13,14
2789 and rdi,r15
2790 xor r14,r8
2791 add rdx,r13
2792 xor rdi,r9
2793 ror r14,28
2794 add r11,rdx
2795 add rdx,rdi
2796 mov r13,r11
2797 add r14,rdx
2798 ror r13,23
2799 mov rdx,r14
2800 mov r12,rax
2801 ror r14,5
2802 xor r13,r11
2803 xor r12,rbx
2804 ror r13,4
2805 xor r14,rdx
2806 and r12,r11
2807 xor r13,r11
2808 add rcx,QWORD[104+rsp]
2809 mov rdi,rdx
2810 xor r12,rbx
2811 ror r14,6
2812 xor rdi,r8
2813 add rcx,r12
2814 ror r13,14
2815 and r15,rdi
2816 xor r14,rdx
2817 add rcx,r13
2818 xor r15,r8
2819 ror r14,28
2820 add r10,rcx
2821 add rcx,r15
2822 mov r13,r10
2823 add r14,rcx
2824 ror r13,23
2825 mov rcx,r14
2826 mov r12,r11
2827 ror r14,5
2828 xor r13,r10
2829 xor r12,rax
2830 ror r13,4
2831 xor r14,rcx
2832 and r12,r10
2833 xor r13,r10
2834 add rbx,QWORD[112+rsp]
2835 mov r15,rcx
2836 xor r12,rax
2837 ror r14,6
2838 xor r15,rdx
2839 add rbx,r12
2840 ror r13,14
2841 and rdi,r15
2842 xor r14,rcx
2843 add rbx,r13
2844 xor rdi,rdx
2845 ror r14,28
2846 add r9,rbx
2847 add rbx,rdi
2848 mov r13,r9
2849 add r14,rbx
2850 ror r13,23
2851 mov rbx,r14
2852 mov r12,r10
2853 ror r14,5
2854 xor r13,r9
2855 xor r12,r11
2856 ror r13,4
2857 xor r14,rbx
2858 and r12,r9
2859 xor r13,r9
2860 add rax,QWORD[120+rsp]
2861 mov rdi,rbx
2862 xor r12,r11
2863 ror r14,6
2864 xor rdi,rcx
2865 add rax,r12
2866 ror r13,14
2867 and r15,rdi
2868 xor r14,rbx
2869 add rax,r13
2870 xor r15,rcx
2871 ror r14,28
2872 add r8,rax
2873 add rax,r15
2874 mov r13,r8
2875 add r14,rax
2876 mov rdi,QWORD[((128+0))+rsp]
2877 mov rax,r14
2878
2879 add rax,QWORD[rdi]
2880 lea rsi,[128+rsi]
2881 add rbx,QWORD[8+rdi]
2882 add rcx,QWORD[16+rdi]
2883 add rdx,QWORD[24+rdi]
2884 add r8,QWORD[32+rdi]
2885 add r9,QWORD[40+rdi]
2886 add r10,QWORD[48+rdi]
2887 add r11,QWORD[56+rdi]
2888
2889 cmp rsi,QWORD[((128+16))+rsp]
2890
2891 mov QWORD[rdi],rax
2892 mov QWORD[8+rdi],rbx
2893 mov QWORD[16+rdi],rcx
2894 mov QWORD[24+rdi],rdx
2895 mov QWORD[32+rdi],r8
2896 mov QWORD[40+rdi],r9
2897 mov QWORD[48+rdi],r10
2898 mov QWORD[56+rdi],r11
2899 jb NEAR $L$loop_xop
2900
2901 mov rsi,QWORD[((128+24))+rsp]
2902 vzeroupper
2903 movaps xmm6,XMMWORD[((128+32))+rsp]
2904 movaps xmm7,XMMWORD[((128+48))+rsp]
2905 movaps xmm8,XMMWORD[((128+64))+rsp]
2906 movaps xmm9,XMMWORD[((128+80))+rsp]
2907 movaps xmm10,XMMWORD[((128+96))+rsp]
2908 movaps xmm11,XMMWORD[((128+112))+rsp]
2909 mov r15,QWORD[rsi]
2910 mov r14,QWORD[8+rsi]
2911 mov r13,QWORD[16+rsi]
2912 mov r12,QWORD[24+rsi]
2913 mov rbp,QWORD[32+rsi]
2914 mov rbx,QWORD[40+rsi]
2915 lea rsp,[48+rsi]
2916 $L$epilogue_xop:
2917 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2918 mov rsi,QWORD[16+rsp]
2919 DB 0F3h,0C3h ;repret
2920 $L$SEH_end_sha512_block_data_order_xop:
2921
2922 ALIGN 64
2923 sha512_block_data_order_avx:
2924 mov QWORD[8+rsp],rdi ;WIN64 prologue
2925 mov QWORD[16+rsp],rsi
2926 mov rax,rsp
2927 $L$SEH_begin_sha512_block_data_order_avx:
2928 mov rdi,rcx
2929 mov rsi,rdx
2930 mov rdx,r8
2931
2932
2933 $L$avx_shortcut:
2934 push rbx
2935 push rbp
2936 push r12
2937 push r13
2938 push r14
2939 push r15
2940 mov r11,rsp
2941 shl rdx,4
2942 sub rsp,256
2943 lea rdx,[rdx*8+rsi]
2944 and rsp,-64
2945 mov QWORD[((128+0))+rsp],rdi
2946 mov QWORD[((128+8))+rsp],rsi
2947 mov QWORD[((128+16))+rsp],rdx
2948 mov QWORD[((128+24))+rsp],r11
2949 movaps XMMWORD[(128+32)+rsp],xmm6
2950 movaps XMMWORD[(128+48)+rsp],xmm7
2951 movaps XMMWORD[(128+64)+rsp],xmm8
2952 movaps XMMWORD[(128+80)+rsp],xmm9
2953 movaps XMMWORD[(128+96)+rsp],xmm10
2954 movaps XMMWORD[(128+112)+rsp],xmm11
2955 $L$prologue_avx:
2956
2957 vzeroupper
2958 mov rax,QWORD[rdi]
2959 mov rbx,QWORD[8+rdi]
2960 mov rcx,QWORD[16+rdi]
2961 mov rdx,QWORD[24+rdi]
2962 mov r8,QWORD[32+rdi]
2963 mov r9,QWORD[40+rdi]
2964 mov r10,QWORD[48+rdi]
2965 mov r11,QWORD[56+rdi]
2966 jmp NEAR $L$loop_avx
2967 ALIGN 16
2968 $L$loop_avx:
2969 vmovdqa xmm11,XMMWORD[((K512+1280))]
2970 vmovdqu xmm0,XMMWORD[rsi]
2971 lea rbp,[((K512+128))]
2972 vmovdqu xmm1,XMMWORD[16+rsi]
2973 vmovdqu xmm2,XMMWORD[32+rsi]
2974 vpshufb xmm0,xmm0,xmm11
2975 vmovdqu xmm3,XMMWORD[48+rsi]
2976 vpshufb xmm1,xmm1,xmm11
2977 vmovdqu xmm4,XMMWORD[64+rsi]
2978 vpshufb xmm2,xmm2,xmm11
2979 vmovdqu xmm5,XMMWORD[80+rsi]
2980 vpshufb xmm3,xmm3,xmm11
2981 vmovdqu xmm6,XMMWORD[96+rsi]
2982 vpshufb xmm4,xmm4,xmm11
2983 vmovdqu xmm7,XMMWORD[112+rsi]
2984 vpshufb xmm5,xmm5,xmm11
2985 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
2986 vpshufb xmm6,xmm6,xmm11
2987 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
2988 vpshufb xmm7,xmm7,xmm11
2989 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
2990 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
2991 vmovdqa XMMWORD[rsp],xmm8
2992 vpaddq xmm8,xmm4,XMMWORD[rbp]
2993 vmovdqa XMMWORD[16+rsp],xmm9
2994 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
2995 vmovdqa XMMWORD[32+rsp],xmm10
2996 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
2997 vmovdqa XMMWORD[48+rsp],xmm11
2998 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
2999 vmovdqa XMMWORD[64+rsp],xmm8
3000 mov r14,rax
3001 vmovdqa XMMWORD[80+rsp],xmm9
3002 mov rdi,rbx
3003 vmovdqa XMMWORD[96+rsp],xmm10
3004 xor rdi,rcx
3005 vmovdqa XMMWORD[112+rsp],xmm11
3006 mov r13,r8
3007 jmp NEAR $L$avx_00_47
3008
3009 ALIGN 16
3010 $L$avx_00_47:
3011 add rbp,256
3012 vpalignr xmm8,xmm1,xmm0,8
3013 shrd r13,r13,23
3014 mov rax,r14
3015 vpalignr xmm11,xmm5,xmm4,8
3016 mov r12,r9
3017 shrd r14,r14,5
3018 vpsrlq xmm10,xmm8,1
3019 xor r13,r8
3020 xor r12,r10
3021 vpaddq xmm0,xmm0,xmm11
3022 shrd r13,r13,4
3023 xor r14,rax
3024 vpsrlq xmm11,xmm8,7
3025 and r12,r8
3026 xor r13,r8
3027 vpsllq xmm9,xmm8,56
3028 add r11,QWORD[rsp]
3029 mov r15,rax
3030 vpxor xmm8,xmm11,xmm10
3031 xor r12,r10
3032 shrd r14,r14,6
3033 vpsrlq xmm10,xmm10,7
3034 xor r15,rbx
3035 add r11,r12
3036 vpxor xmm8,xmm8,xmm9
3037 shrd r13,r13,14
3038 and rdi,r15
3039 vpsllq xmm9,xmm9,7
3040 xor r14,rax
3041 add r11,r13
3042 vpxor xmm8,xmm8,xmm10
3043 xor rdi,rbx
3044 shrd r14,r14,28
3045 vpsrlq xmm11,xmm7,6
3046 add rdx,r11
3047 add r11,rdi
3048 vpxor xmm8,xmm8,xmm9
3049 mov r13,rdx
3050 add r14,r11
3051 vpsllq xmm10,xmm7,3
3052 shrd r13,r13,23
3053 mov r11,r14
3054 vpaddq xmm0,xmm0,xmm8
3055 mov r12,r8
3056 shrd r14,r14,5
3057 vpsrlq xmm9,xmm7,19
3058 xor r13,rdx
3059 xor r12,r9
3060 vpxor xmm11,xmm11,xmm10
3061 shrd r13,r13,4
3062 xor r14,r11
3063 vpsllq xmm10,xmm10,42
3064 and r12,rdx
3065 xor r13,rdx
3066 vpxor xmm11,xmm11,xmm9
3067 add r10,QWORD[8+rsp]
3068 mov rdi,r11
3069 vpsrlq xmm9,xmm9,42
3070 xor r12,r9
3071 shrd r14,r14,6
3072 vpxor xmm11,xmm11,xmm10
3073 xor rdi,rax
3074 add r10,r12
3075 vpxor xmm11,xmm11,xmm9
3076 shrd r13,r13,14
3077 and r15,rdi
3078 vpaddq xmm0,xmm0,xmm11
3079 xor r14,r11
3080 add r10,r13
3081 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
3082 xor r15,rax
3083 shrd r14,r14,28
3084 add rcx,r10
3085 add r10,r15
3086 mov r13,rcx
3087 add r14,r10
3088 vmovdqa XMMWORD[rsp],xmm10
3089 vpalignr xmm8,xmm2,xmm1,8
3090 shrd r13,r13,23
3091 mov r10,r14
3092 vpalignr xmm11,xmm6,xmm5,8
3093 mov r12,rdx
3094 shrd r14,r14,5
3095 vpsrlq xmm10,xmm8,1
3096 xor r13,rcx
3097 xor r12,r8
3098 vpaddq xmm1,xmm1,xmm11
3099 shrd r13,r13,4
3100 xor r14,r10
3101 vpsrlq xmm11,xmm8,7
3102 and r12,rcx
3103 xor r13,rcx
3104 vpsllq xmm9,xmm8,56
3105 add r9,QWORD[16+rsp]
3106 mov r15,r10
3107 vpxor xmm8,xmm11,xmm10
3108 xor r12,r8
3109 shrd r14,r14,6
3110 vpsrlq xmm10,xmm10,7
3111 xor r15,r11
3112 add r9,r12
3113 vpxor xmm8,xmm8,xmm9
3114 shrd r13,r13,14
3115 and rdi,r15
3116 vpsllq xmm9,xmm9,7
3117 xor r14,r10
3118 add r9,r13
3119 vpxor xmm8,xmm8,xmm10
3120 xor rdi,r11
3121 shrd r14,r14,28
3122 vpsrlq xmm11,xmm0,6
3123 add rbx,r9
3124 add r9,rdi
3125 vpxor xmm8,xmm8,xmm9
3126 mov r13,rbx
3127 add r14,r9
3128 vpsllq xmm10,xmm0,3
3129 shrd r13,r13,23
3130 mov r9,r14
3131 vpaddq xmm1,xmm1,xmm8
3132 mov r12,rcx
3133 shrd r14,r14,5
3134 vpsrlq xmm9,xmm0,19
3135 xor r13,rbx
3136 xor r12,rdx
3137 vpxor xmm11,xmm11,xmm10
3138 shrd r13,r13,4
3139 xor r14,r9
3140 vpsllq xmm10,xmm10,42
3141 and r12,rbx
3142 xor r13,rbx
3143 vpxor xmm11,xmm11,xmm9
3144 add r8,QWORD[24+rsp]
3145 mov rdi,r9
3146 vpsrlq xmm9,xmm9,42
3147 xor r12,rdx
3148 shrd r14,r14,6
3149 vpxor xmm11,xmm11,xmm10
3150 xor rdi,r10
3151 add r8,r12
3152 vpxor xmm11,xmm11,xmm9
3153 shrd r13,r13,14
3154 and r15,rdi
3155 vpaddq xmm1,xmm1,xmm11
3156 xor r14,r9
3157 add r8,r13
3158 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
3159 xor r15,r10
3160 shrd r14,r14,28
3161 add rax,r8
3162 add r8,r15
3163 mov r13,rax
3164 add r14,r8
3165 vmovdqa XMMWORD[16+rsp],xmm10
3166 vpalignr xmm8,xmm3,xmm2,8
3167 shrd r13,r13,23
3168 mov r8,r14
3169 vpalignr xmm11,xmm7,xmm6,8
3170 mov r12,rbx
3171 shrd r14,r14,5
3172 vpsrlq xmm10,xmm8,1
3173 xor r13,rax
3174 xor r12,rcx
3175 vpaddq xmm2,xmm2,xmm11
3176 shrd r13,r13,4
3177 xor r14,r8
3178 vpsrlq xmm11,xmm8,7
3179 and r12,rax
3180 xor r13,rax
3181 vpsllq xmm9,xmm8,56
3182 add rdx,QWORD[32+rsp]
3183 mov r15,r8
3184 vpxor xmm8,xmm11,xmm10
3185 xor r12,rcx
3186 shrd r14,r14,6
3187 vpsrlq xmm10,xmm10,7
3188 xor r15,r9
3189 add rdx,r12
3190 vpxor xmm8,xmm8,xmm9
3191 shrd r13,r13,14
3192 and rdi,r15
3193 vpsllq xmm9,xmm9,7
3194 xor r14,r8
3195 add rdx,r13
3196 vpxor xmm8,xmm8,xmm10
3197 xor rdi,r9
3198 shrd r14,r14,28
3199 vpsrlq xmm11,xmm1,6
3200 add r11,rdx
3201 add rdx,rdi
3202 vpxor xmm8,xmm8,xmm9
3203 mov r13,r11
3204 add r14,rdx
3205 vpsllq xmm10,xmm1,3
3206 shrd r13,r13,23
3207 mov rdx,r14
3208 vpaddq xmm2,xmm2,xmm8
3209 mov r12,rax
3210 shrd r14,r14,5
3211 vpsrlq xmm9,xmm1,19
3212 xor r13,r11
3213 xor r12,rbx
3214 vpxor xmm11,xmm11,xmm10
3215 shrd r13,r13,4
3216 xor r14,rdx
3217 vpsllq xmm10,xmm10,42
3218 and r12,r11
3219 xor r13,r11
3220 vpxor xmm11,xmm11,xmm9
3221 add rcx,QWORD[40+rsp]
3222 mov rdi,rdx
3223 vpsrlq xmm9,xmm9,42
3224 xor r12,rbx
3225 shrd r14,r14,6
3226 vpxor xmm11,xmm11,xmm10
3227 xor rdi,r8
3228 add rcx,r12
3229 vpxor xmm11,xmm11,xmm9
3230 shrd r13,r13,14
3231 and r15,rdi
3232 vpaddq xmm2,xmm2,xmm11
3233 xor r14,rdx
3234 add rcx,r13
3235 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
3236 xor r15,r8
3237 shrd r14,r14,28
3238 add r10,rcx
3239 add rcx,r15
3240 mov r13,r10
3241 add r14,rcx
3242 vmovdqa XMMWORD[32+rsp],xmm10
3243 vpalignr xmm8,xmm4,xmm3,8
3244 shrd r13,r13,23
3245 mov rcx,r14
3246 vpalignr xmm11,xmm0,xmm7,8
3247 mov r12,r11
3248 shrd r14,r14,5
3249 vpsrlq xmm10,xmm8,1
3250 xor r13,r10
3251 xor r12,rax
3252 vpaddq xmm3,xmm3,xmm11
3253 shrd r13,r13,4
3254 xor r14,rcx
3255 vpsrlq xmm11,xmm8,7
3256 and r12,r10
3257 xor r13,r10
3258 vpsllq xmm9,xmm8,56
3259 add rbx,QWORD[48+rsp]
3260 mov r15,rcx
3261 vpxor xmm8,xmm11,xmm10
3262 xor r12,rax
3263 shrd r14,r14,6
3264 vpsrlq xmm10,xmm10,7
3265 xor r15,rdx
3266 add rbx,r12
3267 vpxor xmm8,xmm8,xmm9
3268 shrd r13,r13,14
3269 and rdi,r15
3270 vpsllq xmm9,xmm9,7
3271 xor r14,rcx
3272 add rbx,r13
3273 vpxor xmm8,xmm8,xmm10
3274 xor rdi,rdx
3275 shrd r14,r14,28
3276 vpsrlq xmm11,xmm2,6
3277 add r9,rbx
3278 add rbx,rdi
3279 vpxor xmm8,xmm8,xmm9
3280 mov r13,r9
3281 add r14,rbx
3282 vpsllq xmm10,xmm2,3
3283 shrd r13,r13,23
3284 mov rbx,r14
3285 vpaddq xmm3,xmm3,xmm8
3286 mov r12,r10
3287 shrd r14,r14,5
3288 vpsrlq xmm9,xmm2,19
3289 xor r13,r9
3290 xor r12,r11
3291 vpxor xmm11,xmm11,xmm10
3292 shrd r13,r13,4
3293 xor r14,rbx
3294 vpsllq xmm10,xmm10,42
3295 and r12,r9
3296 xor r13,r9
3297 vpxor xmm11,xmm11,xmm9
3298 add rax,QWORD[56+rsp]
3299 mov rdi,rbx
3300 vpsrlq xmm9,xmm9,42
3301 xor r12,r11
3302 shrd r14,r14,6
3303 vpxor xmm11,xmm11,xmm10
3304 xor rdi,rcx
3305 add rax,r12
3306 vpxor xmm11,xmm11,xmm9
3307 shrd r13,r13,14
3308 and r15,rdi
3309 vpaddq xmm3,xmm3,xmm11
3310 xor r14,rbx
3311 add rax,r13
3312 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
3313 xor r15,rcx
3314 shrd r14,r14,28
3315 add r8,rax
3316 add rax,r15
3317 mov r13,r8
3318 add r14,rax
3319 vmovdqa XMMWORD[48+rsp],xmm10
3320 vpalignr xmm8,xmm5,xmm4,8
3321 shrd r13,r13,23
3322 mov rax,r14
3323 vpalignr xmm11,xmm1,xmm0,8
3324 mov r12,r9
3325 shrd r14,r14,5
3326 vpsrlq xmm10,xmm8,1
3327 xor r13,r8
3328 xor r12,r10
3329 vpaddq xmm4,xmm4,xmm11
3330 shrd r13,r13,4
3331 xor r14,rax
3332 vpsrlq xmm11,xmm8,7
3333 and r12,r8
3334 xor r13,r8
3335 vpsllq xmm9,xmm8,56
3336 add r11,QWORD[64+rsp]
3337 mov r15,rax
3338 vpxor xmm8,xmm11,xmm10
3339 xor r12,r10
3340 shrd r14,r14,6
3341 vpsrlq xmm10,xmm10,7
3342 xor r15,rbx
3343 add r11,r12
3344 vpxor xmm8,xmm8,xmm9
3345 shrd r13,r13,14
3346 and rdi,r15
3347 vpsllq xmm9,xmm9,7
3348 xor r14,rax
3349 add r11,r13
3350 vpxor xmm8,xmm8,xmm10
3351 xor rdi,rbx
3352 shrd r14,r14,28
3353 vpsrlq xmm11,xmm3,6
3354 add rdx,r11
3355 add r11,rdi
3356 vpxor xmm8,xmm8,xmm9
3357 mov r13,rdx
3358 add r14,r11
3359 vpsllq xmm10,xmm3,3
3360 shrd r13,r13,23
3361 mov r11,r14
3362 vpaddq xmm4,xmm4,xmm8
3363 mov r12,r8
3364 shrd r14,r14,5
3365 vpsrlq xmm9,xmm3,19
3366 xor r13,rdx
3367 xor r12,r9
3368 vpxor xmm11,xmm11,xmm10
3369 shrd r13,r13,4
3370 xor r14,r11
3371 vpsllq xmm10,xmm10,42
3372 and r12,rdx
3373 xor r13,rdx
3374 vpxor xmm11,xmm11,xmm9
3375 add r10,QWORD[72+rsp]
3376 mov rdi,r11
3377 vpsrlq xmm9,xmm9,42
3378 xor r12,r9
3379 shrd r14,r14,6
3380 vpxor xmm11,xmm11,xmm10
3381 xor rdi,rax
3382 add r10,r12
3383 vpxor xmm11,xmm11,xmm9
3384 shrd r13,r13,14
3385 and r15,rdi
3386 vpaddq xmm4,xmm4,xmm11
3387 xor r14,r11
3388 add r10,r13
3389 vpaddq xmm10,xmm4,XMMWORD[rbp]
3390 xor r15,rax
3391 shrd r14,r14,28
3392 add rcx,r10
3393 add r10,r15
3394 mov r13,rcx
3395 add r14,r10
3396 vmovdqa XMMWORD[64+rsp],xmm10
3397 vpalignr xmm8,xmm6,xmm5,8
3398 shrd r13,r13,23
3399 mov r10,r14
3400 vpalignr xmm11,xmm2,xmm1,8
3401 mov r12,rdx
3402 shrd r14,r14,5
3403 vpsrlq xmm10,xmm8,1
3404 xor r13,rcx
3405 xor r12,r8
3406 vpaddq xmm5,xmm5,xmm11
3407 shrd r13,r13,4
3408 xor r14,r10
3409 vpsrlq xmm11,xmm8,7
3410 and r12,rcx
3411 xor r13,rcx
3412 vpsllq xmm9,xmm8,56
3413 add r9,QWORD[80+rsp]
3414 mov r15,r10
3415 vpxor xmm8,xmm11,xmm10
3416 xor r12,r8
3417 shrd r14,r14,6
3418 vpsrlq xmm10,xmm10,7
3419 xor r15,r11
3420 add r9,r12
3421 vpxor xmm8,xmm8,xmm9
3422 shrd r13,r13,14
3423 and rdi,r15
3424 vpsllq xmm9,xmm9,7
3425 xor r14,r10
3426 add r9,r13
3427 vpxor xmm8,xmm8,xmm10
3428 xor rdi,r11
3429 shrd r14,r14,28
3430 vpsrlq xmm11,xmm4,6
3431 add rbx,r9
3432 add r9,rdi
3433 vpxor xmm8,xmm8,xmm9
3434 mov r13,rbx
3435 add r14,r9
3436 vpsllq xmm10,xmm4,3
3437 shrd r13,r13,23
3438 mov r9,r14
3439 vpaddq xmm5,xmm5,xmm8
3440 mov r12,rcx
3441 shrd r14,r14,5
3442 vpsrlq xmm9,xmm4,19
3443 xor r13,rbx
3444 xor r12,rdx
3445 vpxor xmm11,xmm11,xmm10
3446 shrd r13,r13,4
3447 xor r14,r9
3448 vpsllq xmm10,xmm10,42
3449 and r12,rbx
3450 xor r13,rbx
3451 vpxor xmm11,xmm11,xmm9
3452 add r8,QWORD[88+rsp]
3453 mov rdi,r9
3454 vpsrlq xmm9,xmm9,42
3455 xor r12,rdx
3456 shrd r14,r14,6
3457 vpxor xmm11,xmm11,xmm10
3458 xor rdi,r10
3459 add r8,r12
3460 vpxor xmm11,xmm11,xmm9
3461 shrd r13,r13,14
3462 and r15,rdi
3463 vpaddq xmm5,xmm5,xmm11
3464 xor r14,r9
3465 add r8,r13
3466 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
3467 xor r15,r10
3468 shrd r14,r14,28
3469 add rax,r8
3470 add r8,r15
3471 mov r13,rax
3472 add r14,r8
3473 vmovdqa XMMWORD[80+rsp],xmm10
3474 vpalignr xmm8,xmm7,xmm6,8
3475 shrd r13,r13,23
3476 mov r8,r14
3477 vpalignr xmm11,xmm3,xmm2,8
3478 mov r12,rbx
3479 shrd r14,r14,5
3480 vpsrlq xmm10,xmm8,1
3481 xor r13,rax
3482 xor r12,rcx
3483 vpaddq xmm6,xmm6,xmm11
3484 shrd r13,r13,4
3485 xor r14,r8
3486 vpsrlq xmm11,xmm8,7
3487 and r12,rax
3488 xor r13,rax
3489 vpsllq xmm9,xmm8,56
3490 add rdx,QWORD[96+rsp]
3491 mov r15,r8
3492 vpxor xmm8,xmm11,xmm10
3493 xor r12,rcx
3494 shrd r14,r14,6
3495 vpsrlq xmm10,xmm10,7
3496 xor r15,r9
3497 add rdx,r12
3498 vpxor xmm8,xmm8,xmm9
3499 shrd r13,r13,14
3500 and rdi,r15
3501 vpsllq xmm9,xmm9,7
3502 xor r14,r8
3503 add rdx,r13
3504 vpxor xmm8,xmm8,xmm10
3505 xor rdi,r9
3506 shrd r14,r14,28
3507 vpsrlq xmm11,xmm5,6
3508 add r11,rdx
3509 add rdx,rdi
3510 vpxor xmm8,xmm8,xmm9
3511 mov r13,r11
3512 add r14,rdx
3513 vpsllq xmm10,xmm5,3
3514 shrd r13,r13,23
3515 mov rdx,r14
3516 vpaddq xmm6,xmm6,xmm8
3517 mov r12,rax
3518 shrd r14,r14,5
3519 vpsrlq xmm9,xmm5,19
3520 xor r13,r11
3521 xor r12,rbx
3522 vpxor xmm11,xmm11,xmm10
3523 shrd r13,r13,4
3524 xor r14,rdx
3525 vpsllq xmm10,xmm10,42
3526 and r12,r11
3527 xor r13,r11
3528 vpxor xmm11,xmm11,xmm9
3529 add rcx,QWORD[104+rsp]
3530 mov rdi,rdx
3531 vpsrlq xmm9,xmm9,42
3532 xor r12,rbx
3533 shrd r14,r14,6
3534 vpxor xmm11,xmm11,xmm10
3535 xor rdi,r8
3536 add rcx,r12
3537 vpxor xmm11,xmm11,xmm9
3538 shrd r13,r13,14
3539 and r15,rdi
3540 vpaddq xmm6,xmm6,xmm11
3541 xor r14,rdx
3542 add rcx,r13
3543 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
3544 xor r15,r8
3545 shrd r14,r14,28
3546 add r10,rcx
3547 add rcx,r15
3548 mov r13,r10
3549 add r14,rcx
3550 vmovdqa XMMWORD[96+rsp],xmm10
3551 vpalignr xmm8,xmm0,xmm7,8
3552 shrd r13,r13,23
3553 mov rcx,r14
3554 vpalignr xmm11,xmm4,xmm3,8
3555 mov r12,r11
3556 shrd r14,r14,5
3557 vpsrlq xmm10,xmm8,1
3558 xor r13,r10
3559 xor r12,rax
3560 vpaddq xmm7,xmm7,xmm11
3561 shrd r13,r13,4
3562 xor r14,rcx
3563 vpsrlq xmm11,xmm8,7
3564 and r12,r10
3565 xor r13,r10
3566 vpsllq xmm9,xmm8,56
3567 add rbx,QWORD[112+rsp]
3568 mov r15,rcx
3569 vpxor xmm8,xmm11,xmm10
3570 xor r12,rax
3571 shrd r14,r14,6
3572 vpsrlq xmm10,xmm10,7
3573 xor r15,rdx
3574 add rbx,r12
3575 vpxor xmm8,xmm8,xmm9
3576 shrd r13,r13,14
3577 and rdi,r15
3578 vpsllq xmm9,xmm9,7
3579 xor r14,rcx
3580 add rbx,r13
3581 vpxor xmm8,xmm8,xmm10
3582 xor rdi,rdx
3583 shrd r14,r14,28
3584 vpsrlq xmm11,xmm6,6
3585 add r9,rbx
3586 add rbx,rdi
3587 vpxor xmm8,xmm8,xmm9
3588 mov r13,r9
3589 add r14,rbx
3590 vpsllq xmm10,xmm6,3
3591 shrd r13,r13,23
3592 mov rbx,r14
3593 vpaddq xmm7,xmm7,xmm8
3594 mov r12,r10
3595 shrd r14,r14,5
3596 vpsrlq xmm9,xmm6,19
3597 xor r13,r9
3598 xor r12,r11
3599 vpxor xmm11,xmm11,xmm10
3600 shrd r13,r13,4
3601 xor r14,rbx
3602 vpsllq xmm10,xmm10,42
3603 and r12,r9
3604 xor r13,r9
3605 vpxor xmm11,xmm11,xmm9
3606 add rax,QWORD[120+rsp]
3607 mov rdi,rbx
3608 vpsrlq xmm9,xmm9,42
3609 xor r12,r11
3610 shrd r14,r14,6
3611 vpxor xmm11,xmm11,xmm10
3612 xor rdi,rcx
3613 add rax,r12
3614 vpxor xmm11,xmm11,xmm9
3615 shrd r13,r13,14
3616 and r15,rdi
3617 vpaddq xmm7,xmm7,xmm11
3618 xor r14,rbx
3619 add rax,r13
3620 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
3621 xor r15,rcx
3622 shrd r14,r14,28
3623 add r8,rax
3624 add rax,r15
3625 mov r13,r8
3626 add r14,rax
3627 vmovdqa XMMWORD[112+rsp],xmm10
3628 cmp BYTE[135+rbp],0
3629 jne NEAR $L$avx_00_47
3630 shrd r13,r13,23
3631 mov rax,r14
3632 mov r12,r9
3633 shrd r14,r14,5
3634 xor r13,r8
3635 xor r12,r10
3636 shrd r13,r13,4
3637 xor r14,rax
3638 and r12,r8
3639 xor r13,r8
3640 add r11,QWORD[rsp]
3641 mov r15,rax
3642 xor r12,r10
3643 shrd r14,r14,6
3644 xor r15,rbx
3645 add r11,r12
3646 shrd r13,r13,14
3647 and rdi,r15
3648 xor r14,rax
3649 add r11,r13
3650 xor rdi,rbx
3651 shrd r14,r14,28
3652 add rdx,r11
3653 add r11,rdi
3654 mov r13,rdx
3655 add r14,r11
3656 shrd r13,r13,23
3657 mov r11,r14
3658 mov r12,r8
3659 shrd r14,r14,5
3660 xor r13,rdx
3661 xor r12,r9
3662 shrd r13,r13,4
3663 xor r14,r11
3664 and r12,rdx
3665 xor r13,rdx
3666 add r10,QWORD[8+rsp]
3667 mov rdi,r11
3668 xor r12,r9
3669 shrd r14,r14,6
3670 xor rdi,rax
3671 add r10,r12
3672 shrd r13,r13,14
3673 and r15,rdi
3674 xor r14,r11
3675 add r10,r13
3676 xor r15,rax
3677 shrd r14,r14,28
3678 add rcx,r10
3679 add r10,r15
3680 mov r13,rcx
3681 add r14,r10
3682 shrd r13,r13,23
3683 mov r10,r14
3684 mov r12,rdx
3685 shrd r14,r14,5
3686 xor r13,rcx
3687 xor r12,r8
3688 shrd r13,r13,4
3689 xor r14,r10
3690 and r12,rcx
3691 xor r13,rcx
3692 add r9,QWORD[16+rsp]
3693 mov r15,r10
3694 xor r12,r8
3695 shrd r14,r14,6
3696 xor r15,r11
3697 add r9,r12
3698 shrd r13,r13,14
3699 and rdi,r15
3700 xor r14,r10
3701 add r9,r13
3702 xor rdi,r11
3703 shrd r14,r14,28
3704 add rbx,r9
3705 add r9,rdi
3706 mov r13,rbx
3707 add r14,r9
3708 shrd r13,r13,23
3709 mov r9,r14
3710 mov r12,rcx
3711 shrd r14,r14,5
3712 xor r13,rbx
3713 xor r12,rdx
3714 shrd r13,r13,4
3715 xor r14,r9
3716 and r12,rbx
3717 xor r13,rbx
3718 add r8,QWORD[24+rsp]
3719 mov rdi,r9
3720 xor r12,rdx
3721 shrd r14,r14,6
3722 xor rdi,r10
3723 add r8,r12
3724 shrd r13,r13,14
3725 and r15,rdi
3726 xor r14,r9
3727 add r8,r13
3728 xor r15,r10
3729 shrd r14,r14,28
3730 add rax,r8
3731 add r8,r15
3732 mov r13,rax
3733 add r14,r8
3734 shrd r13,r13,23
3735 mov r8,r14
3736 mov r12,rbx
3737 shrd r14,r14,5
3738 xor r13,rax
3739 xor r12,rcx
3740 shrd r13,r13,4
3741 xor r14,r8
3742 and r12,rax
3743 xor r13,rax
3744 add rdx,QWORD[32+rsp]
3745 mov r15,r8
3746 xor r12,rcx
3747 shrd r14,r14,6
3748 xor r15,r9
3749 add rdx,r12
3750 shrd r13,r13,14
3751 and rdi,r15
3752 xor r14,r8
3753 add rdx,r13
3754 xor rdi,r9
3755 shrd r14,r14,28
3756 add r11,rdx
3757 add rdx,rdi
3758 mov r13,r11
3759 add r14,rdx
3760 shrd r13,r13,23
3761 mov rdx,r14
3762 mov r12,rax
3763 shrd r14,r14,5
3764 xor r13,r11
3765 xor r12,rbx
3766 shrd r13,r13,4
3767 xor r14,rdx
3768 and r12,r11
3769 xor r13,r11
3770 add rcx,QWORD[40+rsp]
3771 mov rdi,rdx
3772 xor r12,rbx
3773 shrd r14,r14,6
3774 xor rdi,r8
3775 add rcx,r12
3776 shrd r13,r13,14
3777 and r15,rdi
3778 xor r14,rdx
3779 add rcx,r13
3780 xor r15,r8
3781 shrd r14,r14,28
3782 add r10,rcx
3783 add rcx,r15
3784 mov r13,r10
3785 add r14,rcx
3786 shrd r13,r13,23
3787 mov rcx,r14
3788 mov r12,r11
3789 shrd r14,r14,5
3790 xor r13,r10
3791 xor r12,rax
3792 shrd r13,r13,4
3793 xor r14,rcx
3794 and r12,r10
3795 xor r13,r10
3796 add rbx,QWORD[48+rsp]
3797 mov r15,rcx
3798 xor r12,rax
3799 shrd r14,r14,6
3800 xor r15,rdx
3801 add rbx,r12
3802 shrd r13,r13,14
3803 and rdi,r15
3804 xor r14,rcx
3805 add rbx,r13
3806 xor rdi,rdx
3807 shrd r14,r14,28
3808 add r9,rbx
3809 add rbx,rdi
3810 mov r13,r9
3811 add r14,rbx
3812 shrd r13,r13,23
3813 mov rbx,r14
3814 mov r12,r10
3815 shrd r14,r14,5
3816 xor r13,r9
3817 xor r12,r11
3818 shrd r13,r13,4
3819 xor r14,rbx
3820 and r12,r9
3821 xor r13,r9
3822 add rax,QWORD[56+rsp]
3823 mov rdi,rbx
3824 xor r12,r11
3825 shrd r14,r14,6
3826 xor rdi,rcx
3827 add rax,r12
3828 shrd r13,r13,14
3829 and r15,rdi
3830 xor r14,rbx
3831 add rax,r13
3832 xor r15,rcx
3833 shrd r14,r14,28
3834 add r8,rax
3835 add rax,r15
3836 mov r13,r8
3837 add r14,rax
3838 shrd r13,r13,23
3839 mov rax,r14
3840 mov r12,r9
3841 shrd r14,r14,5
3842 xor r13,r8
3843 xor r12,r10
3844 shrd r13,r13,4
3845 xor r14,rax
3846 and r12,r8
3847 xor r13,r8
3848 add r11,QWORD[64+rsp]
3849 mov r15,rax
3850 xor r12,r10
3851 shrd r14,r14,6
3852 xor r15,rbx
3853 add r11,r12
3854 shrd r13,r13,14
3855 and rdi,r15
3856 xor r14,rax
3857 add r11,r13
3858 xor rdi,rbx
3859 shrd r14,r14,28
3860 add rdx,r11
3861 add r11,rdi
3862 mov r13,rdx
3863 add r14,r11
3864 shrd r13,r13,23
3865 mov r11,r14
3866 mov r12,r8
3867 shrd r14,r14,5
3868 xor r13,rdx
3869 xor r12,r9
3870 shrd r13,r13,4
3871 xor r14,r11
3872 and r12,rdx
3873 xor r13,rdx
3874 add r10,QWORD[72+rsp]
3875 mov rdi,r11
3876 xor r12,r9
3877 shrd r14,r14,6
3878 xor rdi,rax
3879 add r10,r12
3880 shrd r13,r13,14
3881 and r15,rdi
3882 xor r14,r11
3883 add r10,r13
3884 xor r15,rax
3885 shrd r14,r14,28
3886 add rcx,r10
3887 add r10,r15
3888 mov r13,rcx
3889 add r14,r10
3890 shrd r13,r13,23
3891 mov r10,r14
3892 mov r12,rdx
3893 shrd r14,r14,5
3894 xor r13,rcx
3895 xor r12,r8
3896 shrd r13,r13,4
3897 xor r14,r10
3898 and r12,rcx
3899 xor r13,rcx
3900 add r9,QWORD[80+rsp]
3901 mov r15,r10
3902 xor r12,r8
3903 shrd r14,r14,6
3904 xor r15,r11
3905 add r9,r12
3906 shrd r13,r13,14
3907 and rdi,r15
3908 xor r14,r10
3909 add r9,r13
3910 xor rdi,r11
3911 shrd r14,r14,28
3912 add rbx,r9
3913 add r9,rdi
3914 mov r13,rbx
3915 add r14,r9
3916 shrd r13,r13,23
3917 mov r9,r14
3918 mov r12,rcx
3919 shrd r14,r14,5
3920 xor r13,rbx
3921 xor r12,rdx
3922 shrd r13,r13,4
3923 xor r14,r9
3924 and r12,rbx
3925 xor r13,rbx
3926 add r8,QWORD[88+rsp]
3927 mov rdi,r9
3928 xor r12,rdx
3929 shrd r14,r14,6
3930 xor rdi,r10
3931 add r8,r12
3932 shrd r13,r13,14
3933 and r15,rdi
3934 xor r14,r9
3935 add r8,r13
3936 xor r15,r10
3937 shrd r14,r14,28
3938 add rax,r8
3939 add r8,r15
3940 mov r13,rax
3941 add r14,r8
3942 shrd r13,r13,23
3943 mov r8,r14
3944 mov r12,rbx
3945 shrd r14,r14,5
3946 xor r13,rax
3947 xor r12,rcx
3948 shrd r13,r13,4
3949 xor r14,r8
3950 and r12,rax
3951 xor r13,rax
3952 add rdx,QWORD[96+rsp]
3953 mov r15,r8
3954 xor r12,rcx
3955 shrd r14,r14,6
3956 xor r15,r9
3957 add rdx,r12
3958 shrd r13,r13,14
3959 and rdi,r15
3960 xor r14,r8
3961 add rdx,r13
3962 xor rdi,r9
3963 shrd r14,r14,28
3964 add r11,rdx
3965 add rdx,rdi
3966 mov r13,r11
3967 add r14,rdx
3968 shrd r13,r13,23
3969 mov rdx,r14
3970 mov r12,rax
3971 shrd r14,r14,5
3972 xor r13,r11
3973 xor r12,rbx
3974 shrd r13,r13,4
3975 xor r14,rdx
3976 and r12,r11
3977 xor r13,r11
3978 add rcx,QWORD[104+rsp]
3979 mov rdi,rdx
3980 xor r12,rbx
3981 shrd r14,r14,6
3982 xor rdi,r8
3983 add rcx,r12
3984 shrd r13,r13,14
3985 and r15,rdi
3986 xor r14,rdx
3987 add rcx,r13
3988 xor r15,r8
3989 shrd r14,r14,28
3990 add r10,rcx
3991 add rcx,r15
3992 mov r13,r10
3993 add r14,rcx
3994 shrd r13,r13,23
3995 mov rcx,r14
3996 mov r12,r11
3997 shrd r14,r14,5
3998 xor r13,r10
3999 xor r12,rax
4000 shrd r13,r13,4
4001 xor r14,rcx
4002 and r12,r10
4003 xor r13,r10
4004 add rbx,QWORD[112+rsp]
4005 mov r15,rcx
4006 xor r12,rax
4007 shrd r14,r14,6
4008 xor r15,rdx
4009 add rbx,r12
4010 shrd r13,r13,14
4011 and rdi,r15
4012 xor r14,rcx
4013 add rbx,r13
4014 xor rdi,rdx
4015 shrd r14,r14,28
4016 add r9,rbx
4017 add rbx,rdi
4018 mov r13,r9
4019 add r14,rbx
4020 shrd r13,r13,23
4021 mov rbx,r14
4022 mov r12,r10
4023 shrd r14,r14,5
4024 xor r13,r9
4025 xor r12,r11
4026 shrd r13,r13,4
4027 xor r14,rbx
4028 and r12,r9
4029 xor r13,r9
4030 add rax,QWORD[120+rsp]
4031 mov rdi,rbx
4032 xor r12,r11
4033 shrd r14,r14,6
4034 xor rdi,rcx
4035 add rax,r12
4036 shrd r13,r13,14
4037 and r15,rdi
4038 xor r14,rbx
4039 add rax,r13
4040 xor r15,rcx
4041 shrd r14,r14,28
4042 add r8,rax
4043 add rax,r15
4044 mov r13,r8
4045 add r14,rax
4046 mov rdi,QWORD[((128+0))+rsp]
4047 mov rax,r14
4048
4049 add rax,QWORD[rdi]
4050 lea rsi,[128+rsi]
4051 add rbx,QWORD[8+rdi]
4052 add rcx,QWORD[16+rdi]
4053 add rdx,QWORD[24+rdi]
4054 add r8,QWORD[32+rdi]
4055 add r9,QWORD[40+rdi]
4056 add r10,QWORD[48+rdi]
4057 add r11,QWORD[56+rdi]
4058
4059 cmp rsi,QWORD[((128+16))+rsp]
4060
4061 mov QWORD[rdi],rax
4062 mov QWORD[8+rdi],rbx
4063 mov QWORD[16+rdi],rcx
4064 mov QWORD[24+rdi],rdx
4065 mov QWORD[32+rdi],r8
4066 mov QWORD[40+rdi],r9
4067 mov QWORD[48+rdi],r10
4068 mov QWORD[56+rdi],r11
4069 jb NEAR $L$loop_avx
4070
4071 mov rsi,QWORD[((128+24))+rsp]
4072 vzeroupper
4073 movaps xmm6,XMMWORD[((128+32))+rsp]
4074 movaps xmm7,XMMWORD[((128+48))+rsp]
4075 movaps xmm8,XMMWORD[((128+64))+rsp]
4076 movaps xmm9,XMMWORD[((128+80))+rsp]
4077 movaps xmm10,XMMWORD[((128+96))+rsp]
4078 movaps xmm11,XMMWORD[((128+112))+rsp]
4079 mov r15,QWORD[rsi]
4080 mov r14,QWORD[8+rsi]
4081 mov r13,QWORD[16+rsi]
4082 mov r12,QWORD[24+rsi]
4083 mov rbp,QWORD[32+rsi]
4084 mov rbx,QWORD[40+rsi]
4085 lea rsp,[48+rsi]
4086 $L$epilogue_avx:
4087 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4088 mov rsi,QWORD[16+rsp]
4089 DB 0F3h,0C3h ;repret
4090 $L$SEH_end_sha512_block_data_order_avx:
1804 EXTERN __imp_RtlVirtualUnwind 4091 EXTERN __imp_RtlVirtualUnwind
1805 4092
1806 ALIGN 16 4093 ALIGN 16
1807 se_handler: 4094 se_handler:
1808 push rsi 4095 push rsi
1809 push rdi 4096 push rdi
1810 push rbx 4097 push rbx
1811 push rbp 4098 push rbp
1812 push r12 4099 push r12
1813 push r13 4100 push r13
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
1896 pop rbx 4183 pop rbx
1897 pop rdi 4184 pop rdi
1898 pop rsi 4185 pop rsi
1899 DB 0F3h,0C3h ;repret 4186 DB 0F3h,0C3h ;repret
1900 4187
1901 section .pdata rdata align=4 4188 section .pdata rdata align=4
1902 ALIGN 4 4189 ALIGN 4
1903 DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase 4190 DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase
1904 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase 4191 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase
1905 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase 4192 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase
4193 DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase
4194 DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase
4195 DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase
4196 DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase
4197 DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase
4198 DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase
1906 section .xdata rdata align=8 4199 section .xdata rdata align=8
1907 ALIGN 8 4200 ALIGN 8
1908 $L$SEH_info_sha512_block_data_order: 4201 $L$SEH_info_sha512_block_data_order:
1909 DB 9,0,0,0 4202 DB 9,0,0,0
1910 DD se_handler wrt ..imagebase 4203 DD se_handler wrt ..imagebase
1911 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase 4204 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
4205 $L$SEH_info_sha512_block_data_order_xop:
4206 DB 9,0,0,0
4207 DD se_handler wrt ..imagebase
4208 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
4209 $L$SEH_info_sha512_block_data_order_avx:
4210 DB 9,0,0,0
4211 DD se_handler wrt ..imagebase
4212 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
OLDNEW
« no previous file with comments | « third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698