| OLD | NEW | 
|---|
| 1 // VERSION 2 | 1 // VERSION 2 | 
| 2 /* | 2 /* | 
| 3  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 
| 4  * | 4  * | 
| 5  *  Use of this source code is governed by a BSD-style license | 5  *  Use of this source code is governed by a BSD-style license | 
| 6  *  that can be found in the LICENSE file in the root of the source | 6  *  that can be found in the LICENSE file in the root of the source | 
| 7  *  tree. An additional intellectual property rights grant can be found | 7  *  tree. An additional intellectual property rights grant can be found | 
| 8  *  in the file PATENTS. All contributing project authors may | 8  *  in the file PATENTS. All contributing project authors may | 
| 9  *  be found in the AUTHORS file in the root of the source tree. | 9  *  be found in the AUTHORS file in the root of the source tree. | 
| 10  */ | 10  */ | 
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 157 static const lvec8 kShuffleUYVYY = { | 157 static const lvec8 kShuffleUYVYY = { | 
| 158   1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, | 158   1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, | 
| 159   1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 | 159   1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 | 
| 160 }; | 160 }; | 
| 161 | 161 | 
| 162 // UYVY shuf 8 UV to 16 UV. | 162 // UYVY shuf 8 UV to 16 UV. | 
| 163 static const lvec8 kShuffleUYVYUV = { | 163 static const lvec8 kShuffleUYVYUV = { | 
| 164   0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, | 164   0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, | 
| 165   0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 165   0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 
| 166 }; | 166 }; | 
|  | 167 | 
|  | 168 // NV21 shuf 8 VU to 16 UV. | 
|  | 169 static const lvec8 kShuffleNV21 = { | 
|  | 170   1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, | 
|  | 171   1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, | 
|  | 172 }; | 
| 167 #endif  // HAS_RGB24TOARGBROW_SSSE3 | 173 #endif  // HAS_RGB24TOARGBROW_SSSE3 | 
| 168 | 174 | 
| 169 #ifdef HAS_J400TOARGBROW_SSE2 | 175 #ifdef HAS_J400TOARGBROW_SSE2 | 
| 170 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { | 176 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { | 
| 171   asm volatile ( | 177   asm volatile ( | 
| 172     "pcmpeqb   %%xmm5,%%xmm5                   \n" | 178     "pcmpeqb   %%xmm5,%%xmm5                   \n" | 
| 173     "pslld     $0x18,%%xmm5                    \n" | 179     "pslld     $0x18,%%xmm5                    \n" | 
| 174     LABELALIGN | 180     LABELALIGN | 
| 175   "1:                                          \n" | 181   "1:                                          \n" | 
| 176     "movq      " MEMACCESS(0) ",%%xmm0         \n" | 182     "movq      " MEMACCESS(0) ",%%xmm0         \n" | 
| (...skipping 1214 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1391 | 1397 | 
| 1392 // Read 4 UV from NV12, upsample to 8 UV | 1398 // Read 4 UV from NV12, upsample to 8 UV | 
| 1393 #define READNV12                                                               \ | 1399 #define READNV12                                                               \ | 
| 1394     "movq       " MEMACCESS([uv_buf]) ",%%xmm0                  \n"            \ | 1400     "movq       " MEMACCESS([uv_buf]) ",%%xmm0                  \n"            \ | 
| 1395     "lea        " MEMLEA(0x8, [uv_buf]) ",%[uv_buf]             \n"            \ | 1401     "lea        " MEMLEA(0x8, [uv_buf]) ",%[uv_buf]             \n"            \ | 
| 1396     "punpcklwd  %%xmm0,%%xmm0                                   \n"            \ | 1402     "punpcklwd  %%xmm0,%%xmm0                                   \n"            \ | 
| 1397     "movq       " MEMACCESS([y_buf]) ",%%xmm4                   \n"            \ | 1403     "movq       " MEMACCESS([y_buf]) ",%%xmm4                   \n"            \ | 
| 1398     "punpcklbw  %%xmm4,%%xmm4                                   \n"            \ | 1404     "punpcklbw  %%xmm4,%%xmm4                                   \n"            \ | 
| 1399     "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n" | 1405     "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n" | 
| 1400 | 1406 | 
|  | 1407 // Read 4 VU from NV21, upsample to 8 UV | 
|  | 1408 #define READNV21                                                               \ | 
|  | 1409     "movq       " MEMACCESS([vu_buf]) ",%%xmm0                  \n"            \ | 
|  | 1410     "lea        " MEMLEA(0x8, [vu_buf]) ",%[vu_buf]             \n"            \ | 
|  | 1411     "pshufb     %[kShuffleNV21], %%xmm0                         \n"            \ | 
|  | 1412     "movq       " MEMACCESS([y_buf]) ",%%xmm4                   \n"            \ | 
|  | 1413     "punpcklbw  %%xmm4,%%xmm4                                   \n"            \ | 
|  | 1414     "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n" | 
|  | 1415 | 
| 1401 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. | 1416 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. | 
| 1402 #define READYUY2                                                               \ | 1417 #define READYUY2                                                               \ | 
| 1403     "movdqu     " MEMACCESS([yuy2_buf]) ",%%xmm4                \n"            \ | 1418     "movdqu     " MEMACCESS([yuy2_buf]) ",%%xmm4                \n"            \ | 
| 1404     "pshufb     %[kShuffleYUY2Y], %%xmm4                        \n"            \ | 1419     "pshufb     %[kShuffleYUY2Y], %%xmm4                        \n"            \ | 
| 1405     "movdqu     " MEMACCESS([yuy2_buf]) ",%%xmm0                \n"            \ | 1420     "movdqu     " MEMACCESS([yuy2_buf]) ",%%xmm0                \n"            \ | 
| 1406     "pshufb     %[kShuffleYUY2UV], %%xmm0                       \n"            \ | 1421     "pshufb     %[kShuffleYUY2UV], %%xmm0                       \n"            \ | 
| 1407     "lea        " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf]        \n" | 1422     "lea        " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf]        \n" | 
| 1408 | 1423 | 
| 1409 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. | 1424 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. | 
| 1410 #define READUYVY                                                               \ | 1425 #define READUYVY                                                               \ | 
| (...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1762   : [y_buf]"+r"(y_buf),    // %[y_buf] | 1777   : [y_buf]"+r"(y_buf),    // %[y_buf] | 
| 1763     [uv_buf]"+r"(uv_buf),    // %[uv_buf] | 1778     [uv_buf]"+r"(uv_buf),    // %[uv_buf] | 
| 1764     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 1779     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 
| 1765     [width]"+rm"(width)    // %[width] | 1780     [width]"+rm"(width)    // %[width] | 
| 1766   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 1781   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 
| 1767   // Does not use r14. | 1782   // Does not use r14. | 
| 1768   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1783   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 
| 1769   ); | 1784   ); | 
| 1770 } | 1785 } | 
| 1771 | 1786 | 
|  | 1787 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, | 
|  | 1788                                 const uint8* vu_buf, | 
|  | 1789                                 uint8* dst_argb, | 
|  | 1790                                 struct YuvConstants* yuvconstants, | 
|  | 1791                                 int width) { | 
|  | 1792   asm volatile ( | 
|  | 1793     "pcmpeqb   %%xmm5,%%xmm5                   \n" | 
|  | 1794     LABELALIGN | 
|  | 1795   "1:                                          \n" | 
|  | 1796     READNV21 | 
|  | 1797     YUVTORGB(yuvconstants) | 
|  | 1798     STOREARGB | 
|  | 1799     "sub       $0x8,%[width]                   \n" | 
|  | 1800     "jg        1b                              \n" | 
|  | 1801   : [y_buf]"+r"(y_buf),    // %[y_buf] | 
|  | 1802     [vu_buf]"+r"(vu_buf),    // %[vu_buf] | 
|  | 1803     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 
|  | 1804     [width]"+rm"(width)    // %[width] | 
|  | 1805   : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 
|  | 1806     [kShuffleNV21]"m"(kShuffleNV21) | 
|  | 1807   // Does not use r14. | 
|  | 1808   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 
|  | 1809   ); | 
|  | 1810 } | 
|  | 1811 | 
| 1772 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, | 1812 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, | 
| 1773                                 uint8* dst_argb, | 1813                                 uint8* dst_argb, | 
| 1774                                 struct YuvConstants* yuvconstants, | 1814                                 struct YuvConstants* yuvconstants, | 
| 1775                                 int width) { | 1815                                 int width) { | 
| 1776   asm volatile ( | 1816   asm volatile ( | 
| 1777     "pcmpeqb   %%xmm5,%%xmm5                   \n" | 1817     "pcmpeqb   %%xmm5,%%xmm5                   \n" | 
| 1778     LABELALIGN | 1818     LABELALIGN | 
| 1779   "1:                                          \n" | 1819   "1:                                          \n" | 
| 1780     READYUY2 | 1820     READYUY2 | 
| 1781     YUVTORGB(yuvconstants) | 1821     YUVTORGB(yuvconstants) | 
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1933 #define READNV12_AVX2                                                          \ | 1973 #define READNV12_AVX2                                                          \ | 
| 1934     "vmovdqu    " MEMACCESS([uv_buf]) ",%%xmm0                      \n"        \ | 1974     "vmovdqu    " MEMACCESS([uv_buf]) ",%%xmm0                      \n"        \ | 
| 1935     "lea        " MEMLEA(0x10, [uv_buf]) ",%[uv_buf]                \n"        \ | 1975     "lea        " MEMLEA(0x10, [uv_buf]) ",%[uv_buf]                \n"        \ | 
| 1936     "vpermq     $0xd8,%%ymm0,%%ymm0                                 \n"        \ | 1976     "vpermq     $0xd8,%%ymm0,%%ymm0                                 \n"        \ | 
| 1937     "vpunpcklwd %%ymm0,%%ymm0,%%ymm0                                \n"        \ | 1977     "vpunpcklwd %%ymm0,%%ymm0,%%ymm0                                \n"        \ | 
| 1938     "vmovdqu    " MEMACCESS([y_buf]) ",%%xmm4                       \n"        \ | 1978     "vmovdqu    " MEMACCESS([y_buf]) ",%%xmm4                       \n"        \ | 
| 1939     "vpermq     $0xd8,%%ymm4,%%ymm4                                 \n"        \ | 1979     "vpermq     $0xd8,%%ymm4,%%ymm4                                 \n"        \ | 
| 1940     "vpunpcklbw %%ymm4,%%ymm4,%%ymm4                                \n"        \ | 1980     "vpunpcklbw %%ymm4,%%ymm4,%%ymm4                                \n"        \ | 
| 1941     "lea        " MEMLEA(0x10, [y_buf]) ",%[y_buf]                  \n" | 1981     "lea        " MEMLEA(0x10, [y_buf]) ",%[y_buf]                  \n" | 
| 1942 | 1982 | 
|  | 1983 // Read 8 VU from NV21, upsample to 16 UV. | 
|  | 1984 #define READNV21_AVX2                                                          \ | 
|  | 1985     "vmovdqu    " MEMACCESS([vu_buf]) ",%%xmm0                      \n"        \ | 
|  | 1986     "lea        " MEMLEA(0x10, [vu_buf]) ",%[vu_buf]                \n"        \ | 
|  | 1987     "vpermq     $0xd8,%%ymm0,%%ymm0                                 \n"        \ | 
|  | 1988     "vpshufb     %[kShuffleNV21], %%ymm0, %%ymm0                    \n"        \ | 
|  | 1989     "vmovdqu    " MEMACCESS([y_buf]) ",%%xmm4                       \n"        \ | 
|  | 1990     "vpermq     $0xd8,%%ymm4,%%ymm4                                 \n"        \ | 
|  | 1991     "vpunpcklbw %%ymm4,%%ymm4,%%ymm4                                \n"        \ | 
|  | 1992     "lea        " MEMLEA(0x10, [y_buf]) ",%[y_buf]                  \n" | 
|  | 1993 | 
| 1943 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. | 1994 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. | 
| 1944 #define READYUY2_AVX2                                                          \ | 1995 #define READYUY2_AVX2                                                          \ | 
| 1945     "vmovdqu    " MEMACCESS([yuy2_buf]) ",%%ymm4                    \n"        \ | 1996     "vmovdqu    " MEMACCESS([yuy2_buf]) ",%%ymm4                    \n"        \ | 
| 1946     "vpshufb    %[kShuffleYUY2Y], %%ymm4, %%ymm4                    \n"        \ | 1997     "vpshufb    %[kShuffleYUY2Y], %%ymm4, %%ymm4                    \n"        \ | 
| 1947     "vmovdqu    " MEMACCESS([yuy2_buf]) ",%%ymm0                    \n"        \ | 1998     "vmovdqu    " MEMACCESS([yuy2_buf]) ",%%ymm0                    \n"        \ | 
| 1948     "vpshufb    %[kShuffleYUY2UV], %%ymm0, %%ymm0                   \n"        \ | 1999     "vpshufb    %[kShuffleYUY2UV], %%ymm0, %%ymm0                   \n"        \ | 
| 1949     "lea        " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf]            \n" | 2000     "lea        " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf]            \n" | 
| 1950 | 2001 | 
| 1951 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. | 2002 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. | 
| 1952 #define READUYVY_AVX2                                                          \ | 2003 #define READUYVY_AVX2                                                          \ | 
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 2244     "vzeroupper                                \n" | 2295     "vzeroupper                                \n" | 
| 2245   : [y_buf]"+r"(y_buf),    // %[y_buf] | 2296   : [y_buf]"+r"(y_buf),    // %[y_buf] | 
| 2246     [uv_buf]"+r"(uv_buf),    // %[uv_buf] | 2297     [uv_buf]"+r"(uv_buf),    // %[uv_buf] | 
| 2247     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 2298     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 
| 2248     [width]"+rm"(width)    // %[width] | 2299     [width]"+rm"(width)    // %[width] | 
| 2249   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 2300   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 
| 2250   // Does not use r14. | 2301   // Does not use r14. | 
| 2251   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2302   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 
| 2252   ); | 2303   ); | 
| 2253 } | 2304 } | 
| 2254 #endif  // HAS_YUY2TOARGBROW_AVX2 | 2305 #endif  // HAS_NV12TOARGBROW_AVX2 | 
| 2255 | 2306 | 
|  | 2307 #if defined(HAS_NV21TOARGBROW_AVX2) | 
|  | 2308 // 16 pixels. | 
|  | 2309 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 
|  | 2310 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, | 
|  | 2311                                const uint8* vu_buf, | 
|  | 2312                                uint8* dst_argb, | 
|  | 2313                                struct YuvConstants* yuvconstants, | 
|  | 2314                                int width) { | 
|  | 2315   asm volatile ( | 
|  | 2316     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n" | 
|  | 2317     LABELALIGN | 
|  | 2318   "1:                                          \n" | 
|  | 2319     READNV21_AVX2 | 
|  | 2320     YUVTORGB_AVX2(yuvconstants) | 
|  | 2321     STOREARGB_AVX2 | 
|  | 2322     "sub       $0x10,%[width]                  \n" | 
|  | 2323     "jg        1b                              \n" | 
|  | 2324     "vzeroupper                                \n" | 
|  | 2325   : [y_buf]"+r"(y_buf),    // %[y_buf] | 
|  | 2326     [vu_buf]"+r"(vu_buf),    // %[vu_buf] | 
|  | 2327     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 
|  | 2328     [width]"+rm"(width)    // %[width] | 
|  | 2329   : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 
|  | 2330     [kShuffleNV21]"m"(kShuffleNV21) | 
|  | 2331   // Does not use r14. | 
|  | 2332   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 
|  | 2333   ); | 
|  | 2334 } | 
|  | 2335 #endif  // HAS_NV21TOARGBROW_AVX2 | 
| 2256 | 2336 | 
| 2257 #if defined(HAS_YUY2TOARGBROW_AVX2) | 2337 #if defined(HAS_YUY2TOARGBROW_AVX2) | 
| 2258 // 16 pixels. | 2338 // 16 pixels. | 
| 2259 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2339 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 
| 2260 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, | 2340 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, | 
| 2261                                uint8* dst_argb, | 2341                                uint8* dst_argb, | 
| 2262                                struct YuvConstants* yuvconstants, | 2342                                struct YuvConstants* yuvconstants, | 
| 2263                                int width) { | 2343                                int width) { | 
| 2264   asm volatile ( | 2344   asm volatile ( | 
| 2265     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n" | 2345     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n" | 
| (...skipping 3146 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 5412   ); | 5492   ); | 
| 5413 } | 5493 } | 
| 5414 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5494 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 
| 5415 | 5495 | 
| 5416 #endif  // defined(__x86_64__) || defined(__i386__) | 5496 #endif  // defined(__x86_64__) || defined(__i386__) | 
| 5417 | 5497 | 
| 5418 #ifdef __cplusplus | 5498 #ifdef __cplusplus | 
| 5419 }  // extern "C" | 5499 }  // extern "C" | 
| 5420 }  // namespace libyuv | 5500 }  // namespace libyuv | 
| 5421 #endif | 5501 #endif | 
| OLD | NEW | 
|---|