Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(174)

Side by Side Diff: source/row_gcc.cc

Issue 1388273002: Reimplement NV21ToARGB to allow different color matrix. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: include scale_row.h for scaling macros Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 static const lvec8 kShuffleUYVYY = { 157 static const lvec8 kShuffleUYVYY = {
158 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, 158 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
159 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 159 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
160 }; 160 };
161 161
162 // UYVY shuf 8 UV to 16 UV. 162 // UYVY shuf 8 UV to 16 UV.
163 static const lvec8 kShuffleUYVYUV = { 163 static const lvec8 kShuffleUYVYUV = {
164 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, 164 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,
165 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 165 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
166 }; 166 };
167
168 // NV21 shuf 8 VU to 16 UV.
169 static const lvec8 kShuffleNV21 = {
170 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
171 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
172 };
167 #endif // HAS_RGB24TOARGBROW_SSSE3 173 #endif // HAS_RGB24TOARGBROW_SSSE3
168 174
169 #ifdef HAS_J400TOARGBROW_SSE2 175 #ifdef HAS_J400TOARGBROW_SSE2
170 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { 176 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
171 asm volatile ( 177 asm volatile (
172 "pcmpeqb %%xmm5,%%xmm5 \n" 178 "pcmpeqb %%xmm5,%%xmm5 \n"
173 "pslld $0x18,%%xmm5 \n" 179 "pslld $0x18,%%xmm5 \n"
174 LABELALIGN 180 LABELALIGN
175 "1: \n" 181 "1: \n"
176 "movq " MEMACCESS(0) ",%%xmm0 \n" 182 "movq " MEMACCESS(0) ",%%xmm0 \n"
(...skipping 1214 matching lines...) Expand 10 before | Expand all | Expand 10 after
1391 1397
1392 // Read 4 UV from NV12, upsample to 8 UV 1398 // Read 4 UV from NV12, upsample to 8 UV
1393 #define READNV12 \ 1399 #define READNV12 \
1394 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1400 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1395 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ 1401 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
1396 "punpcklwd %%xmm0,%%xmm0 \n" \ 1402 "punpcklwd %%xmm0,%%xmm0 \n" \
1397 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1403 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1398 "punpcklbw %%xmm4,%%xmm4 \n" \ 1404 "punpcklbw %%xmm4,%%xmm4 \n" \
1399 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" 1405 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1400 1406
1407 // Read 4 VU from NV21, upsample to 8 UV
1408 #define READNV21 \
1409 "movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
1410 "lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \
1411 "pshufb %[kShuffleNV21], %%xmm0 \n" \
1412 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1413 "punpcklbw %%xmm4,%%xmm4 \n" \
1414 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1415
1401 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. 1416 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
1402 #define READYUY2 \ 1417 #define READYUY2 \
1403 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \ 1418 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \
1404 "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ 1419 "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
1405 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \ 1420 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \
1406 "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ 1421 "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
1407 "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n" 1422 "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n"
1408 1423
1409 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. 1424 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
1410 #define READUYVY \ 1425 #define READUYVY \
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
1762 : [y_buf]"+r"(y_buf), // %[y_buf] 1777 : [y_buf]"+r"(y_buf), // %[y_buf]
1763 [uv_buf]"+r"(uv_buf), // %[uv_buf] 1778 [uv_buf]"+r"(uv_buf), // %[uv_buf]
1764 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1779 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1765 [width]"+rm"(width) // %[width] 1780 [width]"+rm"(width) // %[width]
1766 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1781 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1767 // Does not use r14. 1782 // Does not use r14.
1768 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1783 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1769 ); 1784 );
1770 } 1785 }
1771 1786
1787 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
1788 const uint8* vu_buf,
1789 uint8* dst_argb,
1790 struct YuvConstants* yuvconstants,
1791 int width) {
1792 asm volatile (
1793 "pcmpeqb %%xmm5,%%xmm5 \n"
1794 LABELALIGN
1795 "1: \n"
1796 READNV21
1797 YUVTORGB(yuvconstants)
1798 STOREARGB
1799 "sub $0x8,%[width] \n"
1800 "jg 1b \n"
1801 : [y_buf]"+r"(y_buf), // %[y_buf]
1802 [vu_buf]"+r"(vu_buf), // %[vu_buf]
1803 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1804 [width]"+rm"(width) // %[width]
1805 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1806 [kShuffleNV21]"m"(kShuffleNV21)
1807 // Does not use r14.
1808 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1809 );
1810 }
1811
1772 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, 1812 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
1773 uint8* dst_argb, 1813 uint8* dst_argb,
1774 struct YuvConstants* yuvconstants, 1814 struct YuvConstants* yuvconstants,
1775 int width) { 1815 int width) {
1776 asm volatile ( 1816 asm volatile (
1777 "pcmpeqb %%xmm5,%%xmm5 \n" 1817 "pcmpeqb %%xmm5,%%xmm5 \n"
1778 LABELALIGN 1818 LABELALIGN
1779 "1: \n" 1819 "1: \n"
1780 READYUY2 1820 READYUY2
1781 YUVTORGB(yuvconstants) 1821 YUVTORGB(yuvconstants)
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
1933 #define READNV12_AVX2 \ 1973 #define READNV12_AVX2 \
1934 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1974 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1935 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \ 1975 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \
1936 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1976 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1937 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1977 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
1938 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1978 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1939 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1979 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1940 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ 1980 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1941 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" 1981 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
1942 1982
1983 // Read 8 VU from NV21, upsample to 16 UV.
1984 #define READNV21_AVX2 \
1985 "vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
1986 "lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \
1987 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1988 "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
1989 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1990 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1991 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1992 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
1993
1943 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. 1994 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
1944 #define READYUY2_AVX2 \ 1995 #define READYUY2_AVX2 \
1945 "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \ 1996 "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \
1946 "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ 1997 "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \
1947 "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \ 1998 "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \
1948 "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ 1999 "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \
1949 "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n" 2000 "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n"
1950 2001
1951 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. 2002 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
1952 #define READUYVY_AVX2 \ 2003 #define READUYVY_AVX2 \
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
2244 "vzeroupper \n" 2295 "vzeroupper \n"
2245 : [y_buf]"+r"(y_buf), // %[y_buf] 2296 : [y_buf]"+r"(y_buf), // %[y_buf]
2246 [uv_buf]"+r"(uv_buf), // %[uv_buf] 2297 [uv_buf]"+r"(uv_buf), // %[uv_buf]
2247 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2298 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2248 [width]"+rm"(width) // %[width] 2299 [width]"+rm"(width) // %[width]
2249 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2300 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2250 // Does not use r14. 2301 // Does not use r14.
2251 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2302 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2252 ); 2303 );
2253 } 2304 }
2254 #endif // HAS_YUY2TOARGBROW_AVX2 2305 #endif // HAS_NV12TOARGBROW_AVX2
2255 2306
2307 #if defined(HAS_NV21TOARGBROW_AVX2)
2308 // 16 pixels.
2309 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2310 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
2311 const uint8* vu_buf,
2312 uint8* dst_argb,
2313 struct YuvConstants* yuvconstants,
2314 int width) {
2315 asm volatile (
2316 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2317 LABELALIGN
2318 "1: \n"
2319 READNV21_AVX2
2320 YUVTORGB_AVX2(yuvconstants)
2321 STOREARGB_AVX2
2322 "sub $0x10,%[width] \n"
2323 "jg 1b \n"
2324 "vzeroupper \n"
2325 : [y_buf]"+r"(y_buf), // %[y_buf]
2326 [vu_buf]"+r"(vu_buf), // %[vu_buf]
2327 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2328 [width]"+rm"(width) // %[width]
2329 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
2330 [kShuffleNV21]"m"(kShuffleNV21)
2331 // Does not use r14.
2332 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2333 );
2334 }
2335 #endif // HAS_NV21TOARGBROW_AVX2
2256 2336
2257 #if defined(HAS_YUY2TOARGBROW_AVX2) 2337 #if defined(HAS_YUY2TOARGBROW_AVX2)
2258 // 16 pixels. 2338 // 16 pixels.
2259 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2339 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2260 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, 2340 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
2261 uint8* dst_argb, 2341 uint8* dst_argb,
2262 struct YuvConstants* yuvconstants, 2342 struct YuvConstants* yuvconstants,
2263 int width) { 2343 int width) {
2264 asm volatile ( 2344 asm volatile (
2265 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2345 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
(...skipping 3146 matching lines...) Expand 10 before | Expand all | Expand 10 after
5412 ); 5492 );
5413 } 5493 }
5414 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5494 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5415 5495
5416 #endif // defined(__x86_64__) || defined(__i386__) 5496 #endif // defined(__x86_64__) || defined(__i386__)
5417 5497
5418 #ifdef __cplusplus 5498 #ifdef __cplusplus
5419 } // extern "C" 5499 } // extern "C"
5420 } // namespace libyuv 5500 } // namespace libyuv
5421 #endif 5501 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698