Index: source/row_gcc.cc |
diff --git a/source/row_gcc.cc b/source/row_gcc.cc |
index 105fa987b314c3cd29e7d223ac3ff0b47ce9f0de..9940cba125655c238f8bdc4b72e9a2070882b73d 100644 |
--- a/source/row_gcc.cc |
+++ b/source/row_gcc.cc |
@@ -1564,6 +1564,44 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, |
"pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ |
"lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" |
+#if defined(__x86_64__) |
+#define YUVTORGB_SETUP(yuvconstants) \ |
+ "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \ |
+ "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \ |
+ "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \ |
+ "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \ |
+ "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \ |
+ "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \ |
+ "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n" |
+// Convert 8 pixels: 8 UV and 8 Y |
+#define YUVTORGB(yuvconstants) \ |
+ "movdqa %%xmm0,%%xmm1 \n" \ |
+ "movdqa %%xmm0,%%xmm2 \n" \ |
+ "movdqa %%xmm0,%%xmm3 \n" \ |
+ "movdqa %%xmm11,%%xmm0 \n" \ |
+ "pmaddubsw %%xmm8,%%xmm1 \n" \ |
+ "psubw %%xmm1,%%xmm0 \n" \ |
+ "movdqa %%xmm12,%%xmm1 \n" \ |
+ "pmaddubsw %%xmm9,%%xmm2 \n" \ |
+ "psubw %%xmm2,%%xmm1 \n" \ |
+ "movdqa %%xmm13,%%xmm2 \n" \ |
+ "pmaddubsw %%xmm10,%%xmm3 \n" \ |
+ "psubw %%xmm3,%%xmm2 \n" \ |
+ "pmulhuw %%xmm14,%%xmm4 \n" \ |
+ "paddsw %%xmm4,%%xmm0 \n" \ |
+ "paddsw %%xmm4,%%xmm1 \n" \ |
+ "paddsw %%xmm4,%%xmm2 \n" \ |
+ "psraw $0x6,%%xmm0 \n" \ |
+ "psraw $0x6,%%xmm1 \n" \ |
+ "psraw $0x6,%%xmm2 \n" \ |
+ "packuswb %%xmm0,%%xmm0 \n" \ |
+ "packuswb %%xmm1,%%xmm1 \n" \ |
+ "packuswb %%xmm2,%%xmm2 \n" |
+#define YUVTORGB_REGS \ |
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", |
+ |
+#else |
+#define YUVTORGB_SETUP(yuvconstants) |
// Convert 8 pixels: 8 UV and 8 Y |
#define YUVTORGB(yuvconstants) \ |
"movdqa %%xmm0,%%xmm1 \n" \ |
@@ -1588,6 +1626,8 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, |
"packuswb %%xmm0,%%xmm0 \n" \ |
"packuswb %%xmm1,%%xmm1 \n" \ |
"packuswb %%xmm2,%%xmm2 \n" |
+#define YUVTORGB_REGS |
+#endif |
// Store 8 ARGB values. |
#define STOREARGB \ |
@@ -1619,6 +1659,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
@@ -1634,7 +1675,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1646,6 +1687,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" |
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" |
"sub %[u_buf],%[v_buf] \n" |
@@ -1678,7 +1720,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
); |
} |
@@ -1690,6 +1732,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
@@ -1705,7 +1748,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1719,6 +1762,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
LABELALIGN |
"1: \n" |
@@ -1738,7 +1782,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
[width]"+rm"(width) // %[width] |
#endif |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1751,6 +1795,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
@@ -1766,7 +1811,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1777,6 +1822,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
"1: \n" |
@@ -1790,8 +1836,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14. |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1801,6 +1847,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
"1: \n" |
@@ -1815,8 +1862,8 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleNV21]"m"(kShuffleNV21) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14. |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1825,6 +1872,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
"1: \n" |
@@ -1839,8 +1887,8 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
[kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14. |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1849,6 +1897,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
"1: \n" |
@@ -1863,8 +1912,8 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleUYVYY]"m"(kShuffleUYVYY), |
[kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS // Does not use r14. |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1875,6 +1924,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"pcmpeqb %%xmm5,%%xmm5 \n" |
LABELALIGN |
@@ -1890,7 +1940,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -1964,7 +2014,36 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
"vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ |
"lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n" |
-// Convert 16 pixels: 16 UV and 16 Y. |
+#if defined(__x86_64__) |
+#define YUVTORGB_SETUP_AVX2(yuvconstants) \ |
+ "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \ |
+ "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \ |
+ "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \ |
+ "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \ |
+ "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \ |
+ "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \ |
+ "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n" |
+#define YUVTORGB_AVX2(yuvconstants) \ |
+ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ |
+ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ |
+ "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ |
+ "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \ |
+ "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \ |
+ "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \ |
+ "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ |
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ |
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ |
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ |
+ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ |
+ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ |
+ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ |
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ |
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ |
+ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" |
+#define YUVTORGB_REGS_AVX2 \ |
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", |
+#else// Convert 16 pixels: 16 UV and 16 Y. |
+#define YUVTORGB_SETUP_AVX2(yuvconstants) |
#define YUVTORGB_AVX2(yuvconstants) \ |
"vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \ |
"vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \ |
@@ -1985,6 +2064,8 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ |
"vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ |
"vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" |
+#define YUVTORGB_REGS_AVX2 |
+#endif |
// Store 16 ARGB values. |
#define STOREARGB_AVX2 \ |
@@ -2008,6 +2089,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
@@ -2024,7 +2106,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -2041,6 +2123,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
LABELALIGN |
"1: \n" |
@@ -2061,7 +2144,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, |
[width]"+rm"(width) // %[width] |
#endif |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -2077,6 +2160,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"sub %[u_buf],%[v_buf] \n" |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
@@ -2103,7 +2187,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- : "memory", "cc", NACL_R14 |
+ : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
@@ -2118,6 +2202,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
"1: \n" |
@@ -2132,8 +2217,8 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, |
[dst_argb]"+r"(dst_argb), // %[dst_argb] |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. |
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
#endif // HAS_NV12TOARGBROW_AVX2 |
@@ -2147,6 +2232,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
"1: \n" |
@@ -2162,8 +2248,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, |
[width]"+rm"(width) // %[width] |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleNV21]"m"(kShuffleNV21) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. |
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
#endif // HAS_NV21TOARGBROW_AVX2 |
@@ -2176,6 +2262,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
"1: \n" |
@@ -2191,8 +2278,8 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
[kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. |
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
#endif // HAS_YUY2TOARGBROW_AVX2 |
@@ -2205,6 +2292,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, |
const struct YuvConstants* yuvconstants, |
int width) { |
asm volatile ( |
+ YUVTORGB_SETUP_AVX2(yuvconstants) |
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
LABELALIGN |
"1: \n" |
@@ -2220,8 +2308,8 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, |
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
[kShuffleUYVYY]"m"(kShuffleUYVYY), |
[kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
- // Does not use r14. |
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
+ : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
); |
} |
#endif // HAS_UYVYTOARGBROW_AVX2 |