| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 // Shuffle table for converting ARGB to RAW. | 129 // Shuffle table for converting ARGB to RAW. |
| 130 static uvec8 kShuffleMaskARGBToRAW = { | 130 static uvec8 kShuffleMaskARGBToRAW = { |
| 131 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u | 131 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u |
| 132 }; | 132 }; |
| 133 | 133 |
| 134 // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 | 134 // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 |
| 135 static uvec8 kShuffleMaskARGBToRGB24_0 = { | 135 static uvec8 kShuffleMaskARGBToRGB24_0 = { |
| 136 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u | 136 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u |
| 137 }; | 137 }; |
| 138 | 138 |
| 139 // Shuffle table for converting ARGB to RAW. | |
| 140 static uvec8 kShuffleMaskARGBToRAW_0 = { | |
| 141 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u | |
| 142 }; | |
| 143 | |
| 144 // YUY2 shuf 16 Y to 32 Y. | 139 // YUY2 shuf 16 Y to 32 Y. |
| 145 static const lvec8 kShuffleYUY2Y = { | 140 static const lvec8 kShuffleYUY2Y = { |
| 146 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, | 141 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, |
| 147 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 | 142 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
| 148 }; | 143 }; |
| 149 | 144 |
| 150 // YUY2 shuf 8 UV to 16 UV. | 145 // YUY2 shuf 8 UV to 16 UV. |
| 151 static const lvec8 kShuffleYUY2UV = { | 146 static const lvec8 kShuffleYUY2UV = { |
| 152 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, | 147 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, |
| 153 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 | 148 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 |
| (...skipping 1396 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1550 #define STOREARGB \ | 1545 #define STOREARGB \ |
| 1551 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1546 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1552 "punpcklbw %%xmm5,%%xmm2 \n" \ | 1547 "punpcklbw %%xmm5,%%xmm2 \n" \ |
| 1553 "movdqa %%xmm0,%%xmm1 \n" \ | 1548 "movdqa %%xmm0,%%xmm1 \n" \ |
| 1554 "punpcklwd %%xmm2,%%xmm0 \n" \ | 1549 "punpcklwd %%xmm2,%%xmm0 \n" \ |
| 1555 "punpckhwd %%xmm2,%%xmm1 \n" \ | 1550 "punpckhwd %%xmm2,%%xmm1 \n" \ |
| 1556 "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ | 1551 "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ |
| 1557 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ | 1552 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ |
| 1558 "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" | 1553 "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" |
| 1559 | 1554 |
| 1560 // Store 8 BGRA values. | |
| 1561 #define STOREBGRA \ | |
| 1562 "pcmpeqb %%xmm5,%%xmm5 \n" \ | |
| 1563 "punpcklbw %%xmm0,%%xmm1 \n" \ | |
| 1564 "punpcklbw %%xmm2,%%xmm5 \n" \ | |
| 1565 "movdqa %%xmm5,%%xmm0 \n" \ | |
| 1566 "punpcklwd %%xmm1,%%xmm5 \n" \ | |
| 1567 "punpckhwd %%xmm1,%%xmm0 \n" \ | |
| 1568 "movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \ | |
| 1569 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_bgra]) " \n" \ | |
| 1570 "lea " MEMLEA(0x20, [dst_bgra]) ", %[dst_bgra] \n" | |
| 1571 | |
| 1572 // Store 8 ABGR values. | |
| 1573 #define STOREABGR \ | |
| 1574 "punpcklbw %%xmm1,%%xmm2 \n" \ | |
| 1575 "punpcklbw %%xmm5,%%xmm0 \n" \ | |
| 1576 "movdqa %%xmm2,%%xmm1 \n" \ | |
| 1577 "punpcklwd %%xmm0,%%xmm2 \n" \ | |
| 1578 "punpckhwd %%xmm0,%%xmm1 \n" \ | |
| 1579 "movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \ | |
| 1580 "movdqu %%xmm1," MEMACCESS2(0x10, [dst_abgr]) " \n" \ | |
| 1581 "lea " MEMLEA(0x20, [dst_abgr]) ", %[dst_abgr] \n" | |
| 1582 | |
| 1583 // Store 8 RGBA values. | 1555 // Store 8 RGBA values. |
| 1584 #define STORERGBA \ | 1556 #define STORERGBA \ |
| 1585 "pcmpeqb %%xmm5,%%xmm5 \n" \ | 1557 "pcmpeqb %%xmm5,%%xmm5 \n" \ |
| 1586 "punpcklbw %%xmm2,%%xmm1 \n" \ | 1558 "punpcklbw %%xmm2,%%xmm1 \n" \ |
| 1587 "punpcklbw %%xmm0,%%xmm5 \n" \ | 1559 "punpcklbw %%xmm0,%%xmm5 \n" \ |
| 1588 "movdqa %%xmm5,%%xmm0 \n" \ | 1560 "movdqa %%xmm5,%%xmm0 \n" \ |
| 1589 "punpcklwd %%xmm1,%%xmm5 \n" \ | 1561 "punpcklwd %%xmm1,%%xmm5 \n" \ |
| 1590 "punpckhwd %%xmm1,%%xmm0 \n" \ | 1562 "punpckhwd %%xmm1,%%xmm0 \n" \ |
| 1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ | 1563 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ |
| 1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ | 1564 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ |
| (...skipping 378 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1971 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ | 1943 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ |
| 1972 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ | 1944 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ |
| 1973 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ | 1945 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ |
| 1974 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | 1946 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ |
| 1975 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ | 1947 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ |
| 1976 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ | 1948 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ |
| 1977 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ | 1949 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ |
| 1978 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ | 1950 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ |
| 1979 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" | 1951 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" |
| 1980 | 1952 |
| 1981 // Store 16 ABGR values. | |
| 1982 #define STOREABGR_AVX2 \ | |
| 1983 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \ | |
| 1984 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ | |
| 1985 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \ | |
| 1986 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | |
| 1987 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \ | |
| 1988 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \ | |
| 1989 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \ | |
| 1990 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ | |
| 1991 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" | |
| 1992 | |
| 1993 #if defined(HAS_I422TOARGBROW_AVX2) | 1953 #if defined(HAS_I422TOARGBROW_AVX2) |
| 1994 // 16 pixels | 1954 // 16 pixels |
| 1995 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1955 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 1996 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 1956 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
| 1997 const uint8* u_buf, | 1957 const uint8* u_buf, |
| 1998 const uint8* v_buf, | 1958 const uint8* v_buf, |
| 1999 uint8* dst_argb, | 1959 uint8* dst_argb, |
| 2000 const struct YuvConstants* yuvconstants, | 1960 const struct YuvConstants* yuvconstants, |
| 2001 int width) { | 1961 int width) { |
| 2002 asm volatile ( | 1962 asm volatile ( |
| (...skipping 3317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5320 ); | 5280 ); |
| 5321 } | 5281 } |
| 5322 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5282 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5323 | 5283 |
| 5324 #endif // defined(__x86_64__) || defined(__i386__) | 5284 #endif // defined(__x86_64__) || defined(__i386__) |
| 5325 | 5285 |
| 5326 #ifdef __cplusplus | 5286 #ifdef __cplusplus |
| 5327 } // extern "C" | 5287 } // extern "C" |
| 5328 } // namespace libyuv | 5288 } // namespace libyuv |
| 5329 #endif | 5289 #endif |
| OLD | NEW |