OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 18 matching lines...) Expand all Loading... |
29 #if defined(_M_X64) | 29 #if defined(_M_X64) |
30 | 30 |
31 // Read 4 UV from 422, upsample to 8 UV. | 31 // Read 4 UV from 422, upsample to 8 UV. |
32 #define READYUV422 \ | 32 #define READYUV422 \ |
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ | 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ |
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ | 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ |
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ | 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ | 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ |
37 u_buf += 4; \ | 37 u_buf += 4; \ |
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ | 38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ |
39 <<<<<<< HEAD | |
40 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ | 39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ |
41 ======= | |
42 >>>>>>> refs/remotes/origin/master | |
43 y_buf += 8; \ | 40 y_buf += 8; \ |
44 | 41 |
45 // Convert 8 pixels: 8 UV and 8 Y. | 42 // Convert 8 pixels: 8 UV and 8 Y. |
46 #define YUVTORGB(YuvConstants) \ | 43 #define YUVTORGB(YuvConstants) \ |
47 xmm1 = _mm_loadu_si128(&xmm0); \ | 44 xmm1 = _mm_loadu_si128(&xmm0); \ |
48 xmm2 = _mm_loadu_si128(&xmm0); \ | 45 xmm2 = _mm_loadu_si128(&xmm0); \ |
49 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ | 46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ |
50 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ | 47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ |
51 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ | 48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ |
52 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ | 49 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ |
53 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ | 50 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ |
54 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ | 51 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ |
55 <<<<<<< HEAD | |
56 ======= | |
57 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ | |
58 >>>>>>> refs/remotes/origin/master | |
59 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \ | 52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \ |
60 xmm0 = _mm_adds_epi16(xmm0, xmm4); \ | 53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \ |
61 xmm1 = _mm_adds_epi16(xmm1, xmm4); \ | 54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \ |
62 xmm2 = _mm_adds_epi16(xmm2, xmm4); \ | 55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \ |
63 xmm0 = _mm_srai_epi16(xmm0, 6); \ | 56 xmm0 = _mm_srai_epi16(xmm0, 6); \ |
64 xmm1 = _mm_srai_epi16(xmm1, 6); \ | 57 xmm1 = _mm_srai_epi16(xmm1, 6); \ |
65 xmm2 = _mm_srai_epi16(xmm2, 6); \ | 58 xmm2 = _mm_srai_epi16(xmm2, 6); \ |
66 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ | 59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ |
67 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ | 60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ |
68 xmm2 = _mm_packus_epi16(xmm2, xmm2); | 61 xmm2 = _mm_packus_epi16(xmm2, xmm2); |
(...skipping 1784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1853 | 1846 |
1854 // Read 16 UV from 444 | 1847 // Read 16 UV from 444 |
1855 #define READYUV444_AVX2 __asm { \ | 1848 #define READYUV444_AVX2 __asm { \ |
1856 __asm vmovdqu xmm0, [esi] /* U */ \ | 1849 __asm vmovdqu xmm0, [esi] /* U */ \ |
1857 __asm vmovdqu xmm1, [esi + edi] /* V */ \ | 1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \ |
1858 __asm lea esi, [esi + 16] \ | 1851 __asm lea esi, [esi + 16] \ |
1859 __asm vpermq ymm0, ymm0, 0xd8 \ | 1852 __asm vpermq ymm0, ymm0, 0xd8 \ |
1860 __asm vpermq ymm1, ymm1, 0xd8 \ | 1853 __asm vpermq ymm1, ymm1, 0xd8 \ |
1861 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
1862 __asm vmovdqu xmm4, [eax] /* Y */ \ | 1855 __asm vmovdqu xmm4, [eax] /* Y */ \ |
1863 <<<<<<< HEAD | |
1864 __asm vpermq ymm4, ymm4, 0xd8 \ | 1856 __asm vpermq ymm4, ymm4, 0xd8 \ |
1865 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | 1857 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1866 ======= | |
1867 >>>>>>> refs/remotes/origin/master | |
1868 __asm lea eax, [eax + 16] \ | 1858 __asm lea eax, [eax + 16] \ |
1869 } | 1859 } |
1870 | 1860 |
1871 // Read 8 UV from 422, upsample to 16 UV. | 1861 // Read 8 UV from 422, upsample to 16 UV. |
1872 #define READYUV422_AVX2 __asm { \ | 1862 #define READYUV422_AVX2 __asm { \ |
1873 __asm vmovq xmm0, qword ptr [esi] /* U */ \ | 1863 __asm vmovq xmm0, qword ptr [esi] /* U */ \ |
1874 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ | 1864 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ |
1875 __asm lea esi, [esi + 8] \ | 1865 __asm lea esi, [esi + 8] \ |
1876 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1866 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
1877 __asm vpermq ymm0, ymm0, 0xd8 \ | 1867 __asm vpermq ymm0, ymm0, 0xd8 \ |
1878 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1868 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
1879 __asm vmovdqu xmm4, [eax] /* Y */ \ | 1869 __asm vmovdqu xmm4, [eax] /* Y */ \ |
1880 <<<<<<< HEAD | |
1881 __asm vpermq ymm4, ymm4, 0xd8 \ | 1870 __asm vpermq ymm4, ymm4, 0xd8 \ |
1882 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | 1871 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1883 ======= | |
1884 >>>>>>> refs/remotes/origin/master | |
1885 __asm lea eax, [eax + 16] \ | 1872 __asm lea eax, [eax + 16] \ |
1886 } | 1873 } |
1887 | 1874 |
1888 // Read 4 UV from 411, upsample to 16 UV. | 1875 // Read 4 UV from 411, upsample to 16 UV. |
1889 #define READYUV411_AVX2 __asm { \ | 1876 #define READYUV411_AVX2 __asm { \ |
1890 __asm vmovd xmm0, dword ptr [esi] /* U */ \ | 1877 __asm vmovd xmm0, dword ptr [esi] /* U */ \ |
1891 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ | 1878 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ |
1892 __asm lea esi, [esi + 4] \ | 1879 __asm lea esi, [esi + 4] \ |
1893 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ | 1880 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ |
1894 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1881 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
1895 __asm vpermq ymm0, ymm0, 0xd8 \ | 1882 __asm vpermq ymm0, ymm0, 0xd8 \ |
1896 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ | 1883 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ |
1897 __asm vmovdqu xmm4, [eax] /* Y */ \ | 1884 __asm vmovdqu xmm4, [eax] /* Y */ \ |
1898 <<<<<<< HEAD | |
1899 __asm vpermq ymm4, ymm4, 0xd8 \ | 1885 __asm vpermq ymm4, ymm4, 0xd8 \ |
1900 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | 1886 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1901 ======= | |
1902 >>>>>>> refs/remotes/origin/master | |
1903 __asm lea eax, [eax + 16] \ | 1887 __asm lea eax, [eax + 16] \ |
1904 } | 1888 } |
1905 | 1889 |
1906 // Read 8 UV from NV12, upsample to 16 UV. | 1890 // Read 8 UV from NV12, upsample to 16 UV. |
1907 #define READNV12_AVX2 __asm { \ | 1891 #define READNV12_AVX2 __asm { \ |
1908 __asm vmovdqu xmm0, [esi] /* UV */ \ | 1892 __asm vmovdqu xmm0, [esi] /* UV */ \ |
1909 __asm lea esi, [esi + 16] \ | 1893 __asm lea esi, [esi + 16] \ |
1910 __asm vpermq ymm0, ymm0, 0xd8 \ | 1894 __asm vpermq ymm0, ymm0, 0xd8 \ |
1911 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1895 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
1912 __asm vmovdqu xmm4, [eax] /* Y */ \ | 1896 __asm vmovdqu xmm4, [eax] /* Y */ \ |
1913 <<<<<<< HEAD | |
1914 __asm vpermq ymm4, ymm4, 0xd8 \ | 1897 __asm vpermq ymm4, ymm4, 0xd8 \ |
1915 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | 1898 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1916 ======= | |
1917 >>>>>>> refs/remotes/origin/master | |
1918 __asm lea eax, [eax + 16] \ | 1899 __asm lea eax, [eax + 16] \ |
1919 } | 1900 } |
1920 | 1901 |
1921 // Convert 16 pixels: 16 UV and 16 Y. | 1902 // Convert 16 pixels: 16 UV and 16 Y. |
1922 #define YUVTORGB_AVX2(YuvConstants) __asm { \ | 1903 #define YUVTORGB_AVX2(YuvConstants) __asm { \ |
1923 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ | 1904 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ |
1924 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ | 1905 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ |
1925 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ | 1906 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ |
1926 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ | 1907 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ |
1927 __asm vpsubw ymm2, ymm3, ymm2 \ | 1908 __asm vpsubw ymm2, ymm3, ymm2 \ |
1928 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ | 1909 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ |
1929 __asm vpsubw ymm1, ymm3, ymm1 \ | 1910 __asm vpsubw ymm1, ymm3, ymm1 \ |
1930 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ | 1911 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ |
1931 __asm vpsubw ymm0, ymm3, ymm0 \ | 1912 __asm vpsubw ymm0, ymm3, ymm0 \ |
1932 /* Step 2: Find Y contribution to 16 R,G,B values */ \ | 1913 /* Step 2: Find Y contribution to 16 R,G,B values */ \ |
1933 <<<<<<< HEAD | |
1934 ======= | |
1935 __asm vpermq ymm4, ymm4, 0xd8 \ | |
1936 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | |
1937 >>>>>>> refs/remotes/origin/master | |
1938 __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \ | 1914 __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \ |
1939 __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ | 1915 __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ |
1940 __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ | 1916 __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ |
1941 __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ | 1917 __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ |
1942 __asm vpsraw ymm0, ymm0, 6 \ | 1918 __asm vpsraw ymm0, ymm0, 6 \ |
1943 __asm vpsraw ymm1, ymm1, 6 \ | 1919 __asm vpsraw ymm1, ymm1, 6 \ |
1944 __asm vpsraw ymm2, ymm2, 6 \ | 1920 __asm vpsraw ymm2, ymm2, 6 \ |
1945 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ | 1921 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ |
1946 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ | 1922 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ |
1947 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ | 1923 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ |
(...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2316 #if defined(HAS_I422TOARGBROW_SSSE3) | 2292 #if defined(HAS_I422TOARGBROW_SSSE3) |
2317 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. | 2293 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. |
2318 | 2294 |
2319 // Read 8 UV from 444. | 2295 // Read 8 UV from 444. |
2320 #define READYUV444 __asm { \ | 2296 #define READYUV444 __asm { \ |
2321 __asm movq xmm0, qword ptr [esi] /* U */ \ | 2297 __asm movq xmm0, qword ptr [esi] /* U */ \ |
2322 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ | 2298 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ |
2323 __asm lea esi, [esi + 8] \ | 2299 __asm lea esi, [esi + 8] \ |
2324 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2300 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2325 __asm movq xmm4, qword ptr [eax] \ | 2301 __asm movq xmm4, qword ptr [eax] \ |
2326 <<<<<<< HEAD | |
2327 __asm punpcklbw xmm4, xmm4 \ | 2302 __asm punpcklbw xmm4, xmm4 \ |
2328 ======= | |
2329 >>>>>>> refs/remotes/origin/master | |
2330 __asm lea eax, [eax + 8] \ | 2303 __asm lea eax, [eax + 8] \ |
2331 } | 2304 } |
2332 | 2305 |
2333 // Read 4 UV from 422, upsample to 8 UV. | 2306 // Read 4 UV from 422, upsample to 8 UV. |
2334 #define READYUV422 __asm { \ | 2307 #define READYUV422 __asm { \ |
2335 __asm movd xmm0, [esi] /* U */ \ | 2308 __asm movd xmm0, [esi] /* U */ \ |
2336 __asm movd xmm1, [esi + edi] /* V */ \ | 2309 __asm movd xmm1, [esi + edi] /* V */ \ |
2337 __asm lea esi, [esi + 4] \ | 2310 __asm lea esi, [esi + 4] \ |
2338 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2311 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2339 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2312 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2340 __asm movq xmm4, qword ptr [eax] \ | 2313 __asm movq xmm4, qword ptr [eax] \ |
2341 <<<<<<< HEAD | |
2342 __asm punpcklbw xmm4, xmm4 \ | 2314 __asm punpcklbw xmm4, xmm4 \ |
2343 ======= | |
2344 >>>>>>> refs/remotes/origin/master | |
2345 __asm lea eax, [eax + 8] \ | 2315 __asm lea eax, [eax + 8] \ |
2346 } | 2316 } |
2347 | 2317 |
2348 // Read 2 UV from 411, upsample to 8 UV. | 2318 // Read 2 UV from 411, upsample to 8 UV. |
2349 #define READYUV411 __asm { \ | 2319 #define READYUV411 __asm { \ |
2350 __asm pinsrw xmm0, [esi], 0 /* U */ \ | 2320 __asm pinsrw xmm0, [esi], 0 /* U */ \ |
2351 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ | 2321 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ |
2352 __asm lea esi, [esi + 2] \ | 2322 __asm lea esi, [esi + 2] \ |
2353 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2323 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2354 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2324 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2355 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ | 2325 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ |
2356 __asm movq xmm4, qword ptr [eax] \ | 2326 __asm movq xmm4, qword ptr [eax] \ |
2357 <<<<<<< HEAD | |
2358 __asm punpcklbw xmm4, xmm4 \ | 2327 __asm punpcklbw xmm4, xmm4 \ |
2359 ======= | |
2360 >>>>>>> refs/remotes/origin/master | |
2361 __asm lea eax, [eax + 8] \ | 2328 __asm lea eax, [eax + 8] \ |
2362 } | 2329 } |
2363 | 2330 |
2364 // Read 4 UV from NV12, upsample to 8 UV. | 2331 // Read 4 UV from NV12, upsample to 8 UV. |
2365 #define READNV12 __asm { \ | 2332 #define READNV12 __asm { \ |
2366 __asm movq xmm0, qword ptr [esi] /* UV */ \ | 2333 __asm movq xmm0, qword ptr [esi] /* UV */ \ |
2367 __asm lea esi, [esi + 8] \ | 2334 __asm lea esi, [esi + 8] \ |
2368 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2335 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2369 __asm movq xmm4, qword ptr [eax] \ | 2336 __asm movq xmm4, qword ptr [eax] \ |
2370 <<<<<<< HEAD | |
2371 __asm punpcklbw xmm4, xmm4 \ | 2337 __asm punpcklbw xmm4, xmm4 \ |
2372 __asm lea eax, [eax + 8] \ | 2338 __asm lea eax, [eax + 8] \ |
2373 } | 2339 } |
2374 | 2340 |
2375 // YUY2 shuf 8 Y to 16 Y. | 2341 // YUY2 shuf 8 Y to 16 Y. |
2376 static const vec8 kShuffleYUY2Y = { | 2342 static const vec8 kShuffleYUY2Y = { |
2377 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 | 2343 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
2378 }; | 2344 }; |
2379 | 2345 |
2380 // YUY2 shuf 4 UV to 8 UV. | 2346 // YUY2 shuf 4 UV to 8 UV. |
(...skipping 20 matching lines...) Expand all Loading... |
2401 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 2367 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
2402 }; | 2368 }; |
2403 | 2369 |
2404 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. | 2370 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. |
2405 #define READUYVY __asm { \ | 2371 #define READUYVY __asm { \ |
2406 __asm movdqu xmm4, [eax] /* UYVY */ \ | 2372 __asm movdqu xmm4, [eax] /* UYVY */ \ |
2407 __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \ | 2373 __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \ |
2408 __asm movdqu xmm0, [eax] /* UV */ \ | 2374 __asm movdqu xmm0, [eax] /* UV */ \ |
2409 __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \ | 2375 __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \ |
2410 __asm lea eax, [eax + 16] \ | 2376 __asm lea eax, [eax + 16] \ |
2411 ======= | |
2412 __asm lea eax, [eax + 8] \ | 2377 __asm lea eax, [eax + 8] \ |
2413 >>>>>>> refs/remotes/origin/master | |
2414 } | 2378 } |
2415 | 2379 |
2416 // Convert 8 pixels: 8 UV and 8 Y. | 2380 // Convert 8 pixels: 8 UV and 8 Y. |
2417 #define YUVTORGB(YuvConstants) __asm { \ | 2381 #define YUVTORGB(YuvConstants) __asm { \ |
2418 __asm movdqa xmm1, xmm0 \ | 2382 __asm movdqa xmm1, xmm0 \ |
2419 __asm movdqa xmm2, xmm0 \ | 2383 __asm movdqa xmm2, xmm0 \ |
2420 __asm movdqa xmm3, xmm0 \ | 2384 __asm movdqa xmm3, xmm0 \ |
2421 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ | 2385 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ |
2422 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ | 2386 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ |
2423 __asm psubw xmm0, xmm1 \ | 2387 __asm psubw xmm0, xmm1 \ |
2424 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ | 2388 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ |
2425 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ | 2389 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ |
2426 __asm psubw xmm1, xmm2 \ | 2390 __asm psubw xmm1, xmm2 \ |
2427 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ | 2391 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ |
2428 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ | 2392 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ |
2429 __asm psubw xmm2, xmm3 \ | 2393 __asm psubw xmm2, xmm3 \ |
2430 <<<<<<< HEAD | |
2431 ======= | |
2432 __asm punpcklbw xmm4, xmm4 \ | |
2433 >>>>>>> refs/remotes/origin/master | |
2434 __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \ | 2394 __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \ |
2435 __asm paddsw xmm0, xmm4 /* B += Y */ \ | 2395 __asm paddsw xmm0, xmm4 /* B += Y */ \ |
2436 __asm paddsw xmm1, xmm4 /* G += Y */ \ | 2396 __asm paddsw xmm1, xmm4 /* G += Y */ \ |
2437 __asm paddsw xmm2, xmm4 /* R += Y */ \ | 2397 __asm paddsw xmm2, xmm4 /* R += Y */ \ |
2438 __asm psraw xmm0, 6 \ | 2398 __asm psraw xmm0, 6 \ |
2439 __asm psraw xmm1, 6 \ | 2399 __asm psraw xmm1, 6 \ |
2440 __asm psraw xmm2, 6 \ | 2400 __asm psraw xmm2, 6 \ |
2441 __asm packuswb xmm0, xmm0 /* B */ \ | 2401 __asm packuswb xmm0, xmm0 /* B */ \ |
2442 __asm packuswb xmm1, xmm1 /* G */ \ | 2402 __asm packuswb xmm1, xmm1 /* G */ \ |
2443 __asm packuswb xmm2, xmm2 /* R */ \ | 2403 __asm packuswb xmm2, xmm2 /* R */ \ |
(...skipping 3971 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6415 } | 6375 } |
6416 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6376 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6417 | 6377 |
6418 #endif // defined(_M_X64) | 6378 #endif // defined(_M_X64) |
6419 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6379 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6420 | 6380 |
6421 #ifdef __cplusplus | 6381 #ifdef __cplusplus |
6422 } // extern "C" | 6382 } // extern "C" |
6423 } // namespace libyuv | 6383 } // namespace libyuv |
6424 #endif | 6384 #endif |
OLD | NEW |