Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: source/row_win.cc

Issue 1355333002: read in read function for yuv conversion (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 16 matching lines...) Expand all
27 27
28 // 64 bit 28 // 64 bit
29 #if defined(_M_X64) 29 #if defined(_M_X64)
30 30
31 // Read 4 UV from 422, upsample to 8 UV. 31 // Read 4 UV from 422, upsample to 8 UV.
32 #define READYUV422 \ 32 #define READYUV422 \
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
37 u_buf += 4; 37 u_buf += 4; \
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
39 y_buf += 8; \
38 40
39 // Convert 8 pixels: 8 UV and 8 Y. 41 // Convert 8 pixels: 8 UV and 8 Y.
40 #define YUVTORGB(YuvConstants) \ 42 #define YUVTORGB(YuvConstants) \
41 xmm1 = _mm_loadu_si128(&xmm0); \ 43 xmm1 = _mm_loadu_si128(&xmm0); \
42 xmm2 = _mm_loadu_si128(&xmm0); \ 44 xmm2 = _mm_loadu_si128(&xmm0); \
43 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ 45 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \
44 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ 46 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \
45 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ 47 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \
46 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ 48 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \
47 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ 49 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \
48 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ 50 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \
49 xmm3 = _mm_loadl_epi64((__m128i*)y_buf); \ 51 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
50 y_buf += 8; \ 52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \
51 xmm3 = _mm_unpacklo_epi8(xmm3, xmm3); \ 53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \
52 xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb); \ 54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \
53 xmm0 = _mm_adds_epi16(xmm0, xmm3); \ 55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \
54 xmm1 = _mm_adds_epi16(xmm1, xmm3); \
55 xmm2 = _mm_adds_epi16(xmm2, xmm3); \
56 xmm0 = _mm_srai_epi16(xmm0, 6); \ 56 xmm0 = _mm_srai_epi16(xmm0, 6); \
57 xmm1 = _mm_srai_epi16(xmm1, 6); \ 57 xmm1 = _mm_srai_epi16(xmm1, 6); \
58 xmm2 = _mm_srai_epi16(xmm2, 6); \ 58 xmm2 = _mm_srai_epi16(xmm2, 6); \
59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ 59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \
60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ 60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \
61 xmm2 = _mm_packus_epi16(xmm2, xmm2); 61 xmm2 = _mm_packus_epi16(xmm2, xmm2);
62 62
63 // Store 8 ARGB values. 63 // Store 8 ARGB values.
64 #define STOREARGB \ 64 #define STOREARGB \
65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
(...skipping 17 matching lines...) Expand all
83 dst_argb += 32; 83 dst_argb += 32;
84 84
85 85
86 #if defined(HAS_I422TOARGBROW_SSSE3) 86 #if defined(HAS_I422TOARGBROW_SSSE3)
87 void I422ToARGBRow_SSSE3(const uint8* y_buf, 87 void I422ToARGBRow_SSSE3(const uint8* y_buf,
88 const uint8* u_buf, 88 const uint8* u_buf,
89 const uint8* v_buf, 89 const uint8* v_buf,
90 uint8* dst_argb, 90 uint8* dst_argb,
91 struct YuvConstants* yuvconstants, 91 struct YuvConstants* yuvconstants,
92 int width) { 92 int width) {
93 __m128i xmm0, xmm1, xmm2, xmm3; 93 __m128i xmm0, xmm1, xmm2, xmm4;
94 const __m128i xmm5 = _mm_set1_epi8(-1); 94 const __m128i xmm5 = _mm_set1_epi8(-1);
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
96 while (width > 0) { 96 while (width > 0) {
97 READYUV422 97 READYUV422
98 YUVTORGB(YuvConstants) 98 YUVTORGB(yuvconstants)
99 STOREARGB 99 STOREARGB
100 width -= 8; 100 width -= 8;
101 } 101 }
102 } 102 }
103 #endif 103 #endif
104 104
105 #if defined(HAS_I422TOABGRROW_SSSE3) 105 #if defined(HAS_I422TOABGRROW_SSSE3)
106 void I422ToABGRRow_SSSE3(const uint8* y_buf, 106 void I422ToABGRRow_SSSE3(const uint8* y_buf,
107 const uint8* u_buf, 107 const uint8* u_buf,
108 const uint8* v_buf, 108 const uint8* v_buf,
109 uint8* dst_argb, 109 uint8* dst_argb,
110 struct YuvConstants* yuvconstants, 110 struct YuvConstants* yuvconstants,
111 int width) { 111 int width) {
112 __m128i xmm0, xmm1, xmm2, xmm3; 112 __m128i xmm0, xmm1, xmm2, xmm4;
113 const __m128i xmm5 = _mm_set1_epi8(-1); 113 const __m128i xmm5 = _mm_set1_epi8(-1);
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
115 while (width > 0) { 115 while (width > 0) {
116 READYUV422 116 READYUV422
117 YUVTORGB(YuvConstants) 117 YUVTORGB(yuvconstants)
118 STOREABGR 118 STOREABGR
119 width -= 8; 119 width -= 8;
120 } 120 }
121 } 121 }
122 #endif 122 #endif
123 // 32 bit 123 // 32 bit
124 #else // defined(_M_X64) 124 #else // defined(_M_X64)
125 #ifdef HAS_ARGBTOYROW_SSSE3 125 #ifdef HAS_ARGBTOYROW_SSSE3
126 126
127 // Constants for ARGB. 127 // Constants for ARGB.
(...skipping 1717 matching lines...) Expand 10 before | Expand all | Expand 10 after
1845 #endif // HAS_ARGBTOYROW_SSSE3 1845 #endif // HAS_ARGBTOYROW_SSSE3
1846 1846
1847 // Read 16 UV from 444 1847 // Read 16 UV from 444
1848 #define READYUV444_AVX2 __asm { \ 1848 #define READYUV444_AVX2 __asm { \
1849 __asm vmovdqu xmm0, [esi] /* U */ \ 1849 __asm vmovdqu xmm0, [esi] /* U */ \
1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \ 1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \
1851 __asm lea esi, [esi + 16] \ 1851 __asm lea esi, [esi + 16] \
1852 __asm vpermq ymm0, ymm0, 0xd8 \ 1852 __asm vpermq ymm0, ymm0, 0xd8 \
1853 __asm vpermq ymm1, ymm1, 0xd8 \ 1853 __asm vpermq ymm1, ymm1, 0xd8 \
1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1855 __asm vmovdqu xmm4, [eax] /* Y */ \
1856 __asm lea eax, [eax + 16] \
1855 } 1857 }
1856 1858
1857 // Read 8 UV from 422, upsample to 16 UV. 1859 // Read 8 UV from 422, upsample to 16 UV.
1858 #define READYUV422_AVX2 __asm { \ 1860 #define READYUV422_AVX2 __asm { \
1859 __asm vmovq xmm0, qword ptr [esi] /* U */ \ 1861 __asm vmovq xmm0, qword ptr [esi] /* U */ \
1860 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ 1862 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
1861 __asm lea esi, [esi + 8] \ 1863 __asm lea esi, [esi + 8] \
1862 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1864 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1863 __asm vpermq ymm0, ymm0, 0xd8 \ 1865 __asm vpermq ymm0, ymm0, 0xd8 \
1864 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1866 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1867 __asm vmovdqu xmm4, [eax] /* Y */ \
1868 __asm lea eax, [eax + 16] \
1865 } 1869 }
1866 1870
1867 // Read 4 UV from 411, upsample to 16 UV. 1871 // Read 4 UV from 411, upsample to 16 UV.
1868 #define READYUV411_AVX2 __asm { \ 1872 #define READYUV411_AVX2 __asm { \
1869 __asm vmovd xmm0, dword ptr [esi] /* U */ \ 1873 __asm vmovd xmm0, dword ptr [esi] /* U */ \
1870 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ 1874 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
1871 __asm lea esi, [esi + 4] \ 1875 __asm lea esi, [esi + 4] \
1872 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1876 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1873 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1877 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1874 __asm vpermq ymm0, ymm0, 0xd8 \ 1878 __asm vpermq ymm0, ymm0, 0xd8 \
1875 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ 1879 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
1880 __asm vmovdqu xmm4, [eax] /* Y */ \
1881 __asm lea eax, [eax + 16] \
1876 } 1882 }
1877 1883
1878 // Read 8 UV from NV12, upsample to 16 UV. 1884 // Read 8 UV from NV12, upsample to 16 UV.
1879 #define READNV12_AVX2 __asm { \ 1885 #define READNV12_AVX2 __asm { \
1880 __asm vmovdqu xmm0, [esi] /* UV */ \ 1886 __asm vmovdqu xmm0, [esi] /* UV */ \
1881 __asm lea esi, [esi + 16] \ 1887 __asm lea esi, [esi + 16] \
1882 __asm vpermq ymm0, ymm0, 0xd8 \ 1888 __asm vpermq ymm0, ymm0, 0xd8 \
1883 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1889 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1890 __asm vmovdqu xmm4, [eax] /* Y */ \
1891 __asm lea eax, [eax + 16] \
1884 } 1892 }
1885 1893
1886 // Convert 16 pixels: 16 UV and 16 Y. 1894 // Convert 16 pixels: 16 UV and 16 Y.
1887 #define YUVTORGB_AVX2(YuvConstants) __asm { \ 1895 #define YUVTORGB_AVX2(YuvConstants) __asm { \
1888 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ 1896 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\
1889 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ 1897 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\
1890 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ 1898 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\
1891 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ 1899 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \
1892 __asm vpsubw ymm2, ymm3, ymm2 \ 1900 __asm vpsubw ymm2, ymm3, ymm2 \
1893 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ 1901 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \
1894 __asm vpsubw ymm1, ymm3, ymm1 \ 1902 __asm vpsubw ymm1, ymm3, ymm1 \
1895 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ 1903 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
1896 __asm vpsubw ymm0, ymm3, ymm0 \ 1904 __asm vpsubw ymm0, ymm3, ymm0 \
1897 /* Step 2: Find Y contribution to 16 R,G,B values */ \ 1905 /* Step 2: Find Y contribution to 16 R,G,B values */ \
1898 __asm vmovdqu xmm3, [eax] \ 1906 __asm vpermq ymm4, ymm4, 0xd8 \
1899 __asm lea eax, [eax + 16] \ 1907 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1900 __asm vpermq ymm3, ymm3, 0xd8 \ 1908 __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \
1901 __asm vpunpcklbw ymm3, ymm3, ymm3 \ 1909 __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \
1902 __asm vpmulhuw ymm3, ymm3, ymmword ptr [YuvConstants + KYTORGB] \ 1910 __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \
1903 __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \ 1911 __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \
1904 __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
1905 __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
1906 __asm vpsraw ymm0, ymm0, 6 \ 1912 __asm vpsraw ymm0, ymm0, 6 \
1907 __asm vpsraw ymm1, ymm1, 6 \ 1913 __asm vpsraw ymm1, ymm1, 6 \
1908 __asm vpsraw ymm2, ymm2, 6 \ 1914 __asm vpsraw ymm2, ymm2, 6 \
1909 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ 1915 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
1910 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ 1916 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
1911 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ 1917 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
1912 } 1918 }
1913 1919
1914 // Store 16 ARGB values. 1920 // Store 16 ARGB values.
1915 #define STOREARGB_AVX2 __asm { \ 1921 #define STOREARGB_AVX2 __asm { \
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
2279 2285
2280 #if defined(HAS_I422TOARGBROW_SSSE3) 2286 #if defined(HAS_I422TOARGBROW_SSSE3)
2281 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. 2287 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
2282 2288
2283 // Read 8 UV from 444. 2289 // Read 8 UV from 444.
2284 #define READYUV444 __asm { \ 2290 #define READYUV444 __asm { \
2285 __asm movq xmm0, qword ptr [esi] /* U */ \ 2291 __asm movq xmm0, qword ptr [esi] /* U */ \
2286 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ 2292 __asm movq xmm1, qword ptr [esi + edi] /* V */ \
2287 __asm lea esi, [esi + 8] \ 2293 __asm lea esi, [esi + 8] \
2288 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2294 __asm punpcklbw xmm0, xmm1 /* UV */ \
2295 __asm movq xmm4, qword ptr [eax] \
2296 __asm lea eax, [eax + 8] \
2289 } 2297 }
2290 2298
2291 // Read 4 UV from 422, upsample to 8 UV. 2299 // Read 4 UV from 422, upsample to 8 UV.
2292 #define READYUV422 __asm { \ 2300 #define READYUV422 __asm { \
2293 __asm movd xmm0, [esi] /* U */ \ 2301 __asm movd xmm0, [esi] /* U */ \
2294 __asm movd xmm1, [esi + edi] /* V */ \ 2302 __asm movd xmm1, [esi + edi] /* V */ \
2295 __asm lea esi, [esi + 4] \ 2303 __asm lea esi, [esi + 4] \
2296 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2304 __asm punpcklbw xmm0, xmm1 /* UV */ \
2297 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2305 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2306 __asm movq xmm4, qword ptr [eax] \
2307 __asm lea eax, [eax + 8] \
2298 } 2308 }
2299 2309
2300 // Read 2 UV from 411, upsample to 8 UV. 2310 // Read 2 UV from 411, upsample to 8 UV.
2301 #define READYUV411 __asm { \ 2311 #define READYUV411 __asm { \
2302 __asm pinsrw xmm0, [esi], 0 /* U */ \ 2312 __asm pinsrw xmm0, [esi], 0 /* U */ \
2303 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ 2313 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \
2304 __asm lea esi, [esi + 2] \ 2314 __asm lea esi, [esi + 2] \
2305 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2315 __asm punpcklbw xmm0, xmm1 /* UV */ \
2306 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2316 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2307 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ 2317 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
2318 __asm movq xmm4, qword ptr [eax] \
2319 __asm lea eax, [eax + 8] \
2308 } 2320 }
2309 2321
2310 // Read 4 UV from NV12, upsample to 8 UV. 2322 // Read 4 UV from NV12, upsample to 8 UV.
2311 #define READNV12 __asm { \ 2323 #define READNV12 __asm { \
2312 __asm movq xmm0, qword ptr [esi] /* UV */ \ 2324 __asm movq xmm0, qword ptr [esi] /* UV */ \
2313 __asm lea esi, [esi + 8] \ 2325 __asm lea esi, [esi + 8] \
2314 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2326 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2327 __asm movq xmm4, qword ptr [eax] \
2328 __asm lea eax, [eax + 8] \
2315 } 2329 }
2316 2330
2317 // Convert 8 pixels: 8 UV and 8 Y. 2331 // Convert 8 pixels: 8 UV and 8 Y.
2318 #define YUVTORGB(YuvConstants) __asm { \ 2332 #define YUVTORGB(YuvConstants) __asm { \
2319 __asm movdqa xmm1, xmm0 \ 2333 __asm movdqa xmm1, xmm0 \
2320 __asm movdqa xmm2, xmm0 \ 2334 __asm movdqa xmm2, xmm0 \
2321 __asm movdqa xmm3, xmm0 \ 2335 __asm movdqa xmm3, xmm0 \
2322 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ 2336 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \
2323 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ 2337 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
2324 __asm psubw xmm0, xmm1 \ 2338 __asm psubw xmm0, xmm1 \
2325 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ 2339 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
2326 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ 2340 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
2327 __asm psubw xmm1, xmm2 \ 2341 __asm psubw xmm1, xmm2 \
2328 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ 2342 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
2329 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ 2343 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
2330 __asm psubw xmm2, xmm3 \ 2344 __asm psubw xmm2, xmm3 \
2331 __asm movq xmm3, qword ptr [eax] \ 2345 __asm punpcklbw xmm4, xmm4 \
2332 __asm lea eax, [eax + 8] \ 2346 __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \
2333 __asm punpcklbw xmm3, xmm3 \ 2347 __asm paddsw xmm0, xmm4 /* B += Y */ \
2334 __asm pmulhuw xmm3, xmmword ptr [YuvConstants + KYTORGB] \ 2348 __asm paddsw xmm1, xmm4 /* G += Y */ \
2335 __asm paddsw xmm0, xmm3 /* B += Y */ \ 2349 __asm paddsw xmm2, xmm4 /* R += Y */ \
2336 __asm paddsw xmm1, xmm3 /* G += Y */ \
2337 __asm paddsw xmm2, xmm3 /* R += Y */ \
2338 __asm psraw xmm0, 6 \ 2350 __asm psraw xmm0, 6 \
2339 __asm psraw xmm1, 6 \ 2351 __asm psraw xmm1, 6 \
2340 __asm psraw xmm2, 6 \ 2352 __asm psraw xmm2, 6 \
2341 __asm packuswb xmm0, xmm0 /* B */ \ 2353 __asm packuswb xmm0, xmm0 /* B */ \
2342 __asm packuswb xmm1, xmm1 /* G */ \ 2354 __asm packuswb xmm1, xmm1 /* G */ \
2343 __asm packuswb xmm2, xmm2 /* R */ \ 2355 __asm packuswb xmm2, xmm2 /* R */ \
2344 } 2356 }
2345 2357
2346 // Store 8 ARGB values. 2358 // Store 8 ARGB values.
2347 #define STOREARGB __asm { \ 2359 #define STOREARGB __asm { \
(...skipping 3912 matching lines...) Expand 10 before | Expand all | Expand 10 after
6260 } 6272 }
6261 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6273 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6262 6274
6263 #endif // defined(_M_X64) 6275 #endif // defined(_M_X64)
6264 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6276 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6265 6277
6266 #ifdef __cplusplus 6278 #ifdef __cplusplus
6267 } // extern "C" 6279 } // extern "C"
6268 } // namespace libyuv 6280 } // namespace libyuv
6269 #endif 6281 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698