OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 | 27 |
28 // 64 bit | 28 // 64 bit |
29 #if defined(_M_X64) | 29 #if defined(_M_X64) |
30 | 30 |
31 // Read 4 UV from 422, upsample to 8 UV. | 31 // Read 4 UV from 422, upsample to 8 UV. |
32 #define READYUV422 \ | 32 #define READYUV422 \ |
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ | 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ |
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ | 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ |
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ | 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ | 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ |
37 u_buf += 4; | 37 u_buf += 4; \ |
| 38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ |
| 39 y_buf += 8; \ |
38 | 40 |
39 // Convert 8 pixels: 8 UV and 8 Y. | 41 // Convert 8 pixels: 8 UV and 8 Y. |
40 #define YUVTORGB(YuvConstants) \ | 42 #define YUVTORGB(YuvConstants) \ |
41 xmm1 = _mm_loadu_si128(&xmm0); \ | 43 xmm1 = _mm_loadu_si128(&xmm0); \ |
42 xmm2 = _mm_loadu_si128(&xmm0); \ | 44 xmm2 = _mm_loadu_si128(&xmm0); \ |
43 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ | 45 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ |
44 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ | 46 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ |
45 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ | 47 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ |
46 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ | 48 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ |
47 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ | 49 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ |
48 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ | 50 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ |
49 xmm3 = _mm_loadl_epi64((__m128i*)y_buf); \ | 51 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ |
50 y_buf += 8; \ | 52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \ |
51 xmm3 = _mm_unpacklo_epi8(xmm3, xmm3); \ | 53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \ |
52 xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb); \ | 54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \ |
53 xmm0 = _mm_adds_epi16(xmm0, xmm3); \ | 55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \ |
54 xmm1 = _mm_adds_epi16(xmm1, xmm3); \ | |
55 xmm2 = _mm_adds_epi16(xmm2, xmm3); \ | |
56 xmm0 = _mm_srai_epi16(xmm0, 6); \ | 56 xmm0 = _mm_srai_epi16(xmm0, 6); \ |
57 xmm1 = _mm_srai_epi16(xmm1, 6); \ | 57 xmm1 = _mm_srai_epi16(xmm1, 6); \ |
58 xmm2 = _mm_srai_epi16(xmm2, 6); \ | 58 xmm2 = _mm_srai_epi16(xmm2, 6); \ |
59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ | 59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ |
60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ | 60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ |
61 xmm2 = _mm_packus_epi16(xmm2, xmm2); | 61 xmm2 = _mm_packus_epi16(xmm2, xmm2); |
62 | 62 |
63 // Store 8 ARGB values. | 63 // Store 8 ARGB values. |
64 #define STOREARGB \ | 64 #define STOREARGB \ |
65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ | 65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
(...skipping 17 matching lines...) Expand all Loading... |
83 dst_argb += 32; | 83 dst_argb += 32; |
84 | 84 |
85 | 85 |
86 #if defined(HAS_I422TOARGBROW_SSSE3) | 86 #if defined(HAS_I422TOARGBROW_SSSE3) |
87 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 87 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
88 const uint8* u_buf, | 88 const uint8* u_buf, |
89 const uint8* v_buf, | 89 const uint8* v_buf, |
90 uint8* dst_argb, | 90 uint8* dst_argb, |
91 struct YuvConstants* yuvconstants, | 91 struct YuvConstants* yuvconstants, |
92 int width) { | 92 int width) { |
93 __m128i xmm0, xmm1, xmm2, xmm3; | 93 __m128i xmm0, xmm1, xmm2, xmm4; |
94 const __m128i xmm5 = _mm_set1_epi8(-1); | 94 const __m128i xmm5 = _mm_set1_epi8(-1); |
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
96 while (width > 0) { | 96 while (width > 0) { |
97 READYUV422 | 97 READYUV422 |
98 YUVTORGB(YuvConstants) | 98 YUVTORGB(yuvconstants) |
99 STOREARGB | 99 STOREARGB |
100 width -= 8; | 100 width -= 8; |
101 } | 101 } |
102 } | 102 } |
103 #endif | 103 #endif |
104 | 104 |
105 #if defined(HAS_I422TOABGRROW_SSSE3) | 105 #if defined(HAS_I422TOABGRROW_SSSE3) |
106 void I422ToABGRRow_SSSE3(const uint8* y_buf, | 106 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
107 const uint8* u_buf, | 107 const uint8* u_buf, |
108 const uint8* v_buf, | 108 const uint8* v_buf, |
109 uint8* dst_argb, | 109 uint8* dst_argb, |
110 struct YuvConstants* yuvconstants, | 110 struct YuvConstants* yuvconstants, |
111 int width) { | 111 int width) { |
112 __m128i xmm0, xmm1, xmm2, xmm3; | 112 __m128i xmm0, xmm1, xmm2, xmm4; |
113 const __m128i xmm5 = _mm_set1_epi8(-1); | 113 const __m128i xmm5 = _mm_set1_epi8(-1); |
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
115 while (width > 0) { | 115 while (width > 0) { |
116 READYUV422 | 116 READYUV422 |
117 YUVTORGB(YuvConstants) | 117 YUVTORGB(yuvconstants) |
118 STOREABGR | 118 STOREABGR |
119 width -= 8; | 119 width -= 8; |
120 } | 120 } |
121 } | 121 } |
122 #endif | 122 #endif |
123 // 32 bit | 123 // 32 bit |
124 #else // defined(_M_X64) | 124 #else // defined(_M_X64) |
125 #ifdef HAS_ARGBTOYROW_SSSE3 | 125 #ifdef HAS_ARGBTOYROW_SSSE3 |
126 | 126 |
127 // Constants for ARGB. | 127 // Constants for ARGB. |
(...skipping 1717 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1845 #endif // HAS_ARGBTOYROW_SSSE3 | 1845 #endif // HAS_ARGBTOYROW_SSSE3 |
1846 | 1846 |
1847 // Read 16 UV from 444 | 1847 // Read 16 UV from 444 |
1848 #define READYUV444_AVX2 __asm { \ | 1848 #define READYUV444_AVX2 __asm { \ |
1849 __asm vmovdqu xmm0, [esi] /* U */ \ | 1849 __asm vmovdqu xmm0, [esi] /* U */ \ |
1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \ | 1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \ |
1851 __asm lea esi, [esi + 16] \ | 1851 __asm lea esi, [esi + 16] \ |
1852 __asm vpermq ymm0, ymm0, 0xd8 \ | 1852 __asm vpermq ymm0, ymm0, 0xd8 \ |
1853 __asm vpermq ymm1, ymm1, 0xd8 \ | 1853 __asm vpermq ymm1, ymm1, 0xd8 \ |
1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
| 1855 __asm vmovdqu xmm4, [eax] /* Y */ \ |
| 1856 __asm lea eax, [eax + 16] \ |
1855 } | 1857 } |
1856 | 1858 |
1857 // Read 8 UV from 422, upsample to 16 UV. | 1859 // Read 8 UV from 422, upsample to 16 UV. |
1858 #define READYUV422_AVX2 __asm { \ | 1860 #define READYUV422_AVX2 __asm { \ |
1859 __asm vmovq xmm0, qword ptr [esi] /* U */ \ | 1861 __asm vmovq xmm0, qword ptr [esi] /* U */ \ |
1860 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ | 1862 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ |
1861 __asm lea esi, [esi + 8] \ | 1863 __asm lea esi, [esi + 8] \ |
1862 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1864 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
1863 __asm vpermq ymm0, ymm0, 0xd8 \ | 1865 __asm vpermq ymm0, ymm0, 0xd8 \ |
1864 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1866 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
| 1867 __asm vmovdqu xmm4, [eax] /* Y */ \ |
| 1868 __asm lea eax, [eax + 16] \ |
1865 } | 1869 } |
1866 | 1870 |
1867 // Read 4 UV from 411, upsample to 16 UV. | 1871 // Read 4 UV from 411, upsample to 16 UV. |
1868 #define READYUV411_AVX2 __asm { \ | 1872 #define READYUV411_AVX2 __asm { \ |
1869 __asm vmovd xmm0, dword ptr [esi] /* U */ \ | 1873 __asm vmovd xmm0, dword ptr [esi] /* U */ \ |
1870 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ | 1874 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ |
1871 __asm lea esi, [esi + 4] \ | 1875 __asm lea esi, [esi + 4] \ |
1872 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1876 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
1873 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1877 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
1874 __asm vpermq ymm0, ymm0, 0xd8 \ | 1878 __asm vpermq ymm0, ymm0, 0xd8 \ |
1875 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ | 1879 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ |
| 1880 __asm vmovdqu xmm4, [eax] /* Y */ \ |
| 1881 __asm lea eax, [eax + 16] \ |
1876 } | 1882 } |
1877 | 1883 |
1878 // Read 8 UV from NV12, upsample to 16 UV. | 1884 // Read 8 UV from NV12, upsample to 16 UV. |
1879 #define READNV12_AVX2 __asm { \ | 1885 #define READNV12_AVX2 __asm { \ |
1880 __asm vmovdqu xmm0, [esi] /* UV */ \ | 1886 __asm vmovdqu xmm0, [esi] /* UV */ \ |
1881 __asm lea esi, [esi + 16] \ | 1887 __asm lea esi, [esi + 16] \ |
1882 __asm vpermq ymm0, ymm0, 0xd8 \ | 1888 __asm vpermq ymm0, ymm0, 0xd8 \ |
1883 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1889 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
| 1890 __asm vmovdqu xmm4, [eax] /* Y */ \ |
| 1891 __asm lea eax, [eax + 16] \ |
1884 } | 1892 } |
1885 | 1893 |
1886 // Convert 16 pixels: 16 UV and 16 Y. | 1894 // Convert 16 pixels: 16 UV and 16 Y. |
1887 #define YUVTORGB_AVX2(YuvConstants) __asm { \ | 1895 #define YUVTORGB_AVX2(YuvConstants) __asm { \ |
1888 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ | 1896 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ |
1889 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ | 1897 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ |
1890 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ | 1898 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ |
1891 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ | 1899 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ |
1892 __asm vpsubw ymm2, ymm3, ymm2 \ | 1900 __asm vpsubw ymm2, ymm3, ymm2 \ |
1893 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ | 1901 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ |
1894 __asm vpsubw ymm1, ymm3, ymm1 \ | 1902 __asm vpsubw ymm1, ymm3, ymm1 \ |
1895 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ | 1903 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ |
1896 __asm vpsubw ymm0, ymm3, ymm0 \ | 1904 __asm vpsubw ymm0, ymm3, ymm0 \ |
1897 /* Step 2: Find Y contribution to 16 R,G,B values */ \ | 1905 /* Step 2: Find Y contribution to 16 R,G,B values */ \ |
1898 __asm vmovdqu xmm3, [eax] \ | 1906 __asm vpermq ymm4, ymm4, 0xd8 \ |
1899 __asm lea eax, [eax + 16] \ | 1907 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1900 __asm vpermq ymm3, ymm3, 0xd8 \ | 1908 __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \ |
1901 __asm vpunpcklbw ymm3, ymm3, ymm3 \ | 1909 __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ |
1902 __asm vpmulhuw ymm3, ymm3, ymmword ptr [YuvConstants + KYTORGB] \ | 1910 __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ |
1903 __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \ | 1911 __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ |
1904 __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \ | |
1905 __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \ | |
1906 __asm vpsraw ymm0, ymm0, 6 \ | 1912 __asm vpsraw ymm0, ymm0, 6 \ |
1907 __asm vpsraw ymm1, ymm1, 6 \ | 1913 __asm vpsraw ymm1, ymm1, 6 \ |
1908 __asm vpsraw ymm2, ymm2, 6 \ | 1914 __asm vpsraw ymm2, ymm2, 6 \ |
1909 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ | 1915 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ |
1910 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ | 1916 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ |
1911 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ | 1917 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ |
1912 } | 1918 } |
1913 | 1919 |
1914 // Store 16 ARGB values. | 1920 // Store 16 ARGB values. |
1915 #define STOREARGB_AVX2 __asm { \ | 1921 #define STOREARGB_AVX2 __asm { \ |
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2279 | 2285 |
2280 #if defined(HAS_I422TOARGBROW_SSSE3) | 2286 #if defined(HAS_I422TOARGBROW_SSSE3) |
2281 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. | 2287 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. |
2282 | 2288 |
2283 // Read 8 UV from 444. | 2289 // Read 8 UV from 444. |
2284 #define READYUV444 __asm { \ | 2290 #define READYUV444 __asm { \ |
2285 __asm movq xmm0, qword ptr [esi] /* U */ \ | 2291 __asm movq xmm0, qword ptr [esi] /* U */ \ |
2286 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ | 2292 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ |
2287 __asm lea esi, [esi + 8] \ | 2293 __asm lea esi, [esi + 8] \ |
2288 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2294 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
| 2295 __asm movq xmm4, qword ptr [eax] \ |
| 2296 __asm lea eax, [eax + 8] \ |
2289 } | 2297 } |
2290 | 2298 |
2291 // Read 4 UV from 422, upsample to 8 UV. | 2299 // Read 4 UV from 422, upsample to 8 UV. |
2292 #define READYUV422 __asm { \ | 2300 #define READYUV422 __asm { \ |
2293 __asm movd xmm0, [esi] /* U */ \ | 2301 __asm movd xmm0, [esi] /* U */ \ |
2294 __asm movd xmm1, [esi + edi] /* V */ \ | 2302 __asm movd xmm1, [esi + edi] /* V */ \ |
2295 __asm lea esi, [esi + 4] \ | 2303 __asm lea esi, [esi + 4] \ |
2296 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2304 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2297 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2305 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
| 2306 __asm movq xmm4, qword ptr [eax] \ |
| 2307 __asm lea eax, [eax + 8] \ |
2298 } | 2308 } |
2299 | 2309 |
2300 // Read 2 UV from 411, upsample to 8 UV. | 2310 // Read 2 UV from 411, upsample to 8 UV. |
2301 #define READYUV411 __asm { \ | 2311 #define READYUV411 __asm { \ |
2302 __asm pinsrw xmm0, [esi], 0 /* U */ \ | 2312 __asm pinsrw xmm0, [esi], 0 /* U */ \ |
2303 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ | 2313 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ |
2304 __asm lea esi, [esi + 2] \ | 2314 __asm lea esi, [esi + 2] \ |
2305 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2315 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2306 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2316 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2307 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ | 2317 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ |
| 2318 __asm movq xmm4, qword ptr [eax] \ |
| 2319 __asm lea eax, [eax + 8] \ |
2308 } | 2320 } |
2309 | 2321 |
2310 // Read 4 UV from NV12, upsample to 8 UV. | 2322 // Read 4 UV from NV12, upsample to 8 UV. |
2311 #define READNV12 __asm { \ | 2323 #define READNV12 __asm { \ |
2312 __asm movq xmm0, qword ptr [esi] /* UV */ \ | 2324 __asm movq xmm0, qword ptr [esi] /* UV */ \ |
2313 __asm lea esi, [esi + 8] \ | 2325 __asm lea esi, [esi + 8] \ |
2314 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2326 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
| 2327 __asm movq xmm4, qword ptr [eax] \ |
| 2328 __asm lea eax, [eax + 8] \ |
2315 } | 2329 } |
2316 | 2330 |
2317 // Convert 8 pixels: 8 UV and 8 Y. | 2331 // Convert 8 pixels: 8 UV and 8 Y. |
2318 #define YUVTORGB(YuvConstants) __asm { \ | 2332 #define YUVTORGB(YuvConstants) __asm { \ |
2319 __asm movdqa xmm1, xmm0 \ | 2333 __asm movdqa xmm1, xmm0 \ |
2320 __asm movdqa xmm2, xmm0 \ | 2334 __asm movdqa xmm2, xmm0 \ |
2321 __asm movdqa xmm3, xmm0 \ | 2335 __asm movdqa xmm3, xmm0 \ |
2322 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ | 2336 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ |
2323 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ | 2337 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ |
2324 __asm psubw xmm0, xmm1 \ | 2338 __asm psubw xmm0, xmm1 \ |
2325 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ | 2339 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ |
2326 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ | 2340 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ |
2327 __asm psubw xmm1, xmm2 \ | 2341 __asm psubw xmm1, xmm2 \ |
2328 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ | 2342 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ |
2329 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ | 2343 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ |
2330 __asm psubw xmm2, xmm3 \ | 2344 __asm psubw xmm2, xmm3 \ |
2331 __asm movq xmm3, qword ptr [eax] \ | 2345 __asm punpcklbw xmm4, xmm4 \ |
2332 __asm lea eax, [eax + 8] \ | 2346 __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \ |
2333 __asm punpcklbw xmm3, xmm3 \ | 2347 __asm paddsw xmm0, xmm4 /* B += Y */ \ |
2334 __asm pmulhuw xmm3, xmmword ptr [YuvConstants + KYTORGB] \ | 2348 __asm paddsw xmm1, xmm4 /* G += Y */ \ |
2335 __asm paddsw xmm0, xmm3 /* B += Y */ \ | 2349 __asm paddsw xmm2, xmm4 /* R += Y */ \ |
2336 __asm paddsw xmm1, xmm3 /* G += Y */ \ | |
2337 __asm paddsw xmm2, xmm3 /* R += Y */ \ | |
2338 __asm psraw xmm0, 6 \ | 2350 __asm psraw xmm0, 6 \ |
2339 __asm psraw xmm1, 6 \ | 2351 __asm psraw xmm1, 6 \ |
2340 __asm psraw xmm2, 6 \ | 2352 __asm psraw xmm2, 6 \ |
2341 __asm packuswb xmm0, xmm0 /* B */ \ | 2353 __asm packuswb xmm0, xmm0 /* B */ \ |
2342 __asm packuswb xmm1, xmm1 /* G */ \ | 2354 __asm packuswb xmm1, xmm1 /* G */ \ |
2343 __asm packuswb xmm2, xmm2 /* R */ \ | 2355 __asm packuswb xmm2, xmm2 /* R */ \ |
2344 } | 2356 } |
2345 | 2357 |
2346 // Store 8 ARGB values. | 2358 // Store 8 ARGB values. |
2347 #define STOREARGB __asm { \ | 2359 #define STOREARGB __asm { \ |
(...skipping 3912 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6260 } | 6272 } |
6261 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6273 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6262 | 6274 |
6263 #endif // defined(_M_X64) | 6275 #endif // defined(_M_X64) |
6264 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6276 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6265 | 6277 |
6266 #ifdef __cplusplus | 6278 #ifdef __cplusplus |
6267 } // extern "C" | 6279 } // extern "C" |
6268 } // namespace libyuv | 6280 } // namespace libyuv |
6269 #endif | 6281 #endif |
OLD | NEW |