OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1929 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1940 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] | 1940 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] |
1941 [width]"+rm"(width) // %[width] | 1941 [width]"+rm"(width) // %[width] |
1942 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1942 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1943 : "memory", "cc", NACL_R14 YUVTORGB_REGS | 1943 : "memory", "cc", NACL_R14 YUVTORGB_REGS |
1944 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1944 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1945 ); | 1945 ); |
1946 } | 1946 } |
1947 | 1947 |
1948 #endif // HAS_I422TOARGBROW_SSSE3 | 1948 #endif // HAS_I422TOARGBROW_SSSE3 |
1949 | 1949 |
| 1950 // Read 16 UV from 444 |
| 1951 #define READYUV444_AVX2 \ |
| 1952 "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1953 MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1954 "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \ |
| 1955 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ |
| 1956 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ |
| 1957 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ |
| 1958 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1959 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ |
| 1960 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ |
| 1961 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" |
| 1962 |
1950 // Read 8 UV from 422, upsample to 16 UV. | 1963 // Read 8 UV from 422, upsample to 16 UV. |
1951 #define READYUV422_AVX2 \ | 1964 #define READYUV422_AVX2 \ |
1952 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1965 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1953 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1966 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1954 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1967 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
1955 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ | 1968 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ |
1956 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ | 1969 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ |
1957 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ | 1970 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ |
1958 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1971 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1959 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ | 1972 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2072 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ | 2085 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ |
2073 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ | 2086 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ |
2074 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ | 2087 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ |
2075 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | 2088 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ |
2076 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ | 2089 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ |
2077 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ | 2090 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ |
2078 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ | 2091 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ |
2079 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ | 2092 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ |
2080 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" | 2093 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" |
2081 | 2094 |
| 2095 #ifdef HAS_I444TOARGBROW_AVX2 |
| 2096 // 16 pixels |
| 2097 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
| 2098 void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, |
| 2099 const uint8* u_buf, |
| 2100 const uint8* v_buf, |
| 2101 uint8* dst_argb, |
| 2102 const struct YuvConstants* yuvconstants, |
| 2103 int width) { |
| 2104 asm volatile ( |
| 2105 YUVTORGB_SETUP_AVX2(yuvconstants) |
| 2106 "sub %[u_buf],%[v_buf] \n" |
| 2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2108 LABELALIGN |
| 2109 "1: \n" |
| 2110 READYUV444_AVX2 |
| 2111 YUVTORGB_AVX2(yuvconstants) |
| 2112 STOREARGB_AVX2 |
| 2113 "sub $0x10,%[width] \n" |
| 2114 "jg 1b \n" |
| 2115 "vzeroupper \n" |
| 2116 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2117 [u_buf]"+r"(u_buf), // %[u_buf] |
| 2118 [v_buf]"+r"(v_buf), // %[v_buf] |
| 2119 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2120 [width]"+rm"(width) // %[width] |
| 2121 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2122 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
| 2123 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2124 ); |
| 2125 } |
| 2126 #endif // HAS_I444TOARGBROW_AVX2 |
| 2127 |
2082 #if defined(HAS_I422TOARGBROW_AVX2) | 2128 #if defined(HAS_I422TOARGBROW_AVX2) |
2083 // 16 pixels | 2129 // 16 pixels |
2084 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2130 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2085 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 2131 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
2086 const uint8* u_buf, | 2132 const uint8* u_buf, |
2087 const uint8* v_buf, | 2133 const uint8* v_buf, |
2088 uint8* dst_argb, | 2134 uint8* dst_argb, |
2089 const struct YuvConstants* yuvconstants, | 2135 const struct YuvConstants* yuvconstants, |
2090 int width) { | 2136 int width) { |
2091 asm volatile ( | 2137 asm volatile ( |
2092 YUVTORGB_SETUP_AVX2(yuvconstants) | 2138 YUVTORGB_SETUP_AVX2(yuvconstants) |
2093 "sub %[u_buf],%[v_buf] \n" | 2139 "sub %[u_buf],%[v_buf] \n" |
2094 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2140 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2095 LABELALIGN | 2141 LABELALIGN |
2096 "1: \n" | 2142 "1: \n" |
2097 READYUV422_AVX2 | 2143 READYUV422_AVX2 |
2098 YUVTORGB_AVX2(yuvconstants) | 2144 YUVTORGB_AVX2(yuvconstants) |
2099 STOREARGB_AVX2 | 2145 STOREARGB_AVX2 |
2100 "sub $0x10,%[width] \n" | 2146 "sub $0x10,%[width] \n" |
2101 "jg 1b \n" | 2147 "jg 1b \n" |
2102 "vzeroupper \n" | 2148 "vzeroupper \n" |
2103 : [y_buf]"+r"(y_buf), // %[y_buf] | 2149 : [y_buf]"+r"(y_buf), // %[y_buf] |
2104 [u_buf]"+r"(u_buf), // %[u_buf] | 2150 [u_buf]"+r"(u_buf), // %[u_buf] |
(...skipping 3311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5416 ); | 5462 ); |
5417 } | 5463 } |
5418 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5464 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5419 | 5465 |
5420 #endif // defined(__x86_64__) || defined(__i386__) | 5466 #endif // defined(__x86_64__) || defined(__i386__) |
5421 | 5467 |
5422 #ifdef __cplusplus | 5468 #ifdef __cplusplus |
5423 } // extern "C" | 5469 } // extern "C" |
5424 } // namespace libyuv | 5470 } // namespace libyuv |
5425 #endif | 5471 #endif |
OLD | NEW |