OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
129 // Shuffle table for converting ARGB to RAW. | 129 // Shuffle table for converting ARGB to RAW. |
130 static uvec8 kShuffleMaskARGBToRAW = { | 130 static uvec8 kShuffleMaskARGBToRAW = { |
131 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u | 131 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u |
132 }; | 132 }; |
133 | 133 |
134 // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 | 134 // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 |
135 static uvec8 kShuffleMaskARGBToRGB24_0 = { | 135 static uvec8 kShuffleMaskARGBToRGB24_0 = { |
136 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u | 136 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u |
137 }; | 137 }; |
138 | 138 |
139 // Shuffle table for converting ARGB to RAW. | |
140 static uvec8 kShuffleMaskARGBToRAW_0 = { | |
141 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u | |
142 }; | |
143 | |
144 // YUY2 shuf 16 Y to 32 Y. | 139 // YUY2 shuf 16 Y to 32 Y. |
145 static const lvec8 kShuffleYUY2Y = { | 140 static const lvec8 kShuffleYUY2Y = { |
146 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, | 141 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, |
147 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 | 142 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
148 }; | 143 }; |
149 | 144 |
150 // YUY2 shuf 8 UV to 16 UV. | 145 // YUY2 shuf 8 UV to 16 UV. |
151 static const lvec8 kShuffleYUY2UV = { | 146 static const lvec8 kShuffleYUY2UV = { |
152 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, | 147 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, |
153 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 | 148 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 |
(...skipping 1396 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1550 #define STOREARGB \ | 1545 #define STOREARGB \ |
1551 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1546 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1552 "punpcklbw %%xmm5,%%xmm2 \n" \ | 1547 "punpcklbw %%xmm5,%%xmm2 \n" \ |
1553 "movdqa %%xmm0,%%xmm1 \n" \ | 1548 "movdqa %%xmm0,%%xmm1 \n" \ |
1554 "punpcklwd %%xmm2,%%xmm0 \n" \ | 1549 "punpcklwd %%xmm2,%%xmm0 \n" \ |
1555 "punpckhwd %%xmm2,%%xmm1 \n" \ | 1550 "punpckhwd %%xmm2,%%xmm1 \n" \ |
1556 "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ | 1551 "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ |
1557 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ | 1552 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ |
1558 "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" | 1553 "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" |
1559 | 1554 |
1560 // Store 8 BGRA values. | |
1561 #define STOREBGRA \ | |
1562 "pcmpeqb %%xmm5,%%xmm5 \n" \ | |
1563 "punpcklbw %%xmm0,%%xmm1 \n" \ | |
1564 "punpcklbw %%xmm2,%%xmm5 \n" \ | |
1565 "movdqa %%xmm5,%%xmm0 \n" \ | |
1566 "punpcklwd %%xmm1,%%xmm5 \n" \ | |
1567 "punpckhwd %%xmm1,%%xmm0 \n" \ | |
1568 "movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \ | |
1569 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_bgra]) " \n" \ | |
1570 "lea " MEMLEA(0x20, [dst_bgra]) ", %[dst_bgra] \n" | |
1571 | |
1572 // Store 8 ABGR values. | |
1573 #define STOREABGR \ | |
1574 "punpcklbw %%xmm1,%%xmm2 \n" \ | |
1575 "punpcklbw %%xmm5,%%xmm0 \n" \ | |
1576 "movdqa %%xmm2,%%xmm1 \n" \ | |
1577 "punpcklwd %%xmm0,%%xmm2 \n" \ | |
1578 "punpckhwd %%xmm0,%%xmm1 \n" \ | |
1579 "movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \ | |
1580 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_abgr]) " \n" \ | |
1581 "lea " MEMLEA(0x20, [dst_abgr]) ", %[dst_abgr] \n" | |
1582 | |
1583 // Store 8 RGBA values. | 1555 // Store 8 RGBA values. |
1584 #define STORERGBA \ | 1556 #define STORERGBA \ |
1585 "pcmpeqb %%xmm5,%%xmm5 \n" \ | 1557 "pcmpeqb %%xmm5,%%xmm5 \n" \ |
1586 "punpcklbw %%xmm2,%%xmm1 \n" \ | 1558 "punpcklbw %%xmm2,%%xmm1 \n" \ |
1587 "punpcklbw %%xmm0,%%xmm5 \n" \ | 1559 "punpcklbw %%xmm0,%%xmm5 \n" \ |
1588 "movdqa %%xmm5,%%xmm0 \n" \ | 1560 "movdqa %%xmm5,%%xmm0 \n" \ |
1589 "punpcklwd %%xmm1,%%xmm5 \n" \ | 1561 "punpcklwd %%xmm1,%%xmm5 \n" \ |
1590 "punpckhwd %%xmm1,%%xmm0 \n" \ | 1562 "punpckhwd %%xmm1,%%xmm0 \n" \ |
1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ | 1563 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ |
1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ | 1564 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ |
(...skipping 378 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1971 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ | 1943 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ |
1972 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ | 1944 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ |
1973 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ | 1945 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ |
1974 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | 1946 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ |
1975 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ | 1947 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ |
1976 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ | 1948 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ |
1977 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ | 1949 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ |
1978 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ | 1950 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ |
1979 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" | 1951 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" |
1980 | 1952 |
1981 // Store 16 ABGR values. | |
1982 #define STOREABGR_AVX2 \ | |
1983 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \ | |
1984 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ | |
1985 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \ | |
1986 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | |
1987 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \ | |
1988 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \ | |
1989 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \ | |
1990 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ | |
1991 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" | |
1992 | |
1993 #if defined(HAS_I422TOARGBROW_AVX2) | 1953 #if defined(HAS_I422TOARGBROW_AVX2) |
1994 // 16 pixels | 1954 // 16 pixels |
1995 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1955 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
1996 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 1956 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
1997 const uint8* u_buf, | 1957 const uint8* u_buf, |
1998 const uint8* v_buf, | 1958 const uint8* v_buf, |
1999 uint8* dst_argb, | 1959 uint8* dst_argb, |
2000 const struct YuvConstants* yuvconstants, | 1960 const struct YuvConstants* yuvconstants, |
2001 int width) { | 1961 int width) { |
2002 asm volatile ( | 1962 asm volatile ( |
(...skipping 3317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5320 ); | 5280 ); |
5321 } | 5281 } |
5322 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5282 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5323 | 5283 |
5324 #endif // defined(__x86_64__) || defined(__i386__) | 5284 #endif // defined(__x86_64__) || defined(__i386__) |
5325 | 5285 |
5326 #ifdef __cplusplus | 5286 #ifdef __cplusplus |
5327 } // extern "C" | 5287 } // extern "C" |
5328 } // namespace libyuv | 5288 } // namespace libyuv |
5329 #endif | 5289 #endif |
OLD | NEW |