OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1500 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1511 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1511 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1512 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1512 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
1513 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1513 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1514 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1514 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1515 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1515 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1516 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1516 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1517 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ | 1517 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ |
1518 "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ | 1518 "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ |
1519 "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" | 1519 "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" |
1520 | 1520 |
1521 // Read 2 UV from 411, upsample to 8 UV | 1521 // Read 2 UV from 411, upsample to 8 UV. |
1522 #define READYUV411 \ | 1522 // reading 4 bytes is an msan violation. |
1523 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1523 // "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" |
1524 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1524 // MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) |
| 1525 // pinsrw fails with drmemory |
| 1526 // __asm pinsrw xmm0, [esi], 0 /* U */ |
| 1527 // __asm pinsrw xmm1, [esi + edi], 0 /* V */ |
| 1528 #define READYUV411_EBX \ |
| 1529 "movzw " MEMACCESS([u_buf]) ",%%ebx \n" \ |
| 1530 "movd %%ebx,%%xmm0 \n" \ |
| 1531 MEMOPREG(movzw,0x00,[u_buf],[v_buf],1,ebx) " \n" \ |
| 1532 "movd %%ebx,%%xmm1 \n" \ |
1525 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1533 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
1526 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1534 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1527 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1535 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1528 "punpckldq %%xmm0,%%xmm0 \n" \ | 1536 "punpckldq %%xmm0,%%xmm0 \n" \ |
1529 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1537 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1530 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1538 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1531 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1539 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1532 | 1540 |
1533 // Read 4 UV from NV12, upsample to 8 UV | 1541 // Read 4 UV from NV12, upsample to 8 UV |
1534 #define READNV12 \ | 1542 #define READNV12 \ |
(...skipping 258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1793 const uint8* v_buf, | 1801 const uint8* v_buf, |
1794 uint8* dst_argb, | 1802 uint8* dst_argb, |
1795 const struct YuvConstants* yuvconstants, | 1803 const struct YuvConstants* yuvconstants, |
1796 int width) { | 1804 int width) { |
1797 asm volatile ( | 1805 asm volatile ( |
1798 YUVTORGB_SETUP(yuvconstants) | 1806 YUVTORGB_SETUP(yuvconstants) |
1799 "sub %[u_buf],%[v_buf] \n" | 1807 "sub %[u_buf],%[v_buf] \n" |
1800 "pcmpeqb %%xmm5,%%xmm5 \n" | 1808 "pcmpeqb %%xmm5,%%xmm5 \n" |
1801 LABELALIGN | 1809 LABELALIGN |
1802 "1: \n" | 1810 "1: \n" |
1803 READYUV411 | 1811 READYUV411_EBX |
1804 YUVTORGB(yuvconstants) | 1812 YUVTORGB(yuvconstants) |
1805 STOREARGB | 1813 STOREARGB |
1806 "sub $0x8,%[width] \n" | 1814 "sub $0x8,%[width] \n" |
1807 "jg 1b \n" | 1815 "jg 1b \n" |
1808 : [y_buf]"+r"(y_buf), // %[y_buf] | 1816 : [y_buf]"+r"(y_buf), // %[y_buf] |
1809 [u_buf]"+r"(u_buf), // %[u_buf] | 1817 [u_buf]"+r"(u_buf), // %[u_buf] |
1810 [v_buf]"+r"(v_buf), // %[v_buf] | 1818 [v_buf]"+r"(v_buf), // %[v_buf] |
1811 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1819 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1812 [width]"+rm"(width) // %[width] | 1820 [width]"+rm"(width) // %[width] |
1813 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1821 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1814 : "memory", "cc", NACL_R14 YUVTORGB_REGS | 1822 : "memory", "cc", "ebx", NACL_R14 YUVTORGB_REGS |
1815 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1823 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1816 ); | 1824 ); |
1817 } | 1825 } |
1818 | 1826 |
1819 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, | 1827 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
1820 const uint8* uv_buf, | 1828 const uint8* uv_buf, |
1821 uint8* dst_argb, | 1829 uint8* dst_argb, |
1822 const struct YuvConstants* yuvconstants, | 1830 const struct YuvConstants* yuvconstants, |
1823 int width) { | 1831 int width) { |
1824 asm volatile ( | 1832 asm volatile ( |
(...skipping 3653 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5478 ); | 5486 ); |
5479 } | 5487 } |
5480 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5488 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5481 | 5489 |
5482 #endif // defined(__x86_64__) || defined(__i386__) | 5490 #endif // defined(__x86_64__) || defined(__i386__) |
5483 | 5491 |
5484 #ifdef __cplusplus | 5492 #ifdef __cplusplus |
5485 } // extern "C" | 5493 } // extern "C" |
5486 } // namespace libyuv | 5494 } // namespace libyuv |
5487 #endif | 5495 #endif |
OLD | NEW |