| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 2094 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2105 YUVTORGB_SETUP_AVX2(yuvconstants) | 2105 YUVTORGB_SETUP_AVX2(yuvconstants) |
| 2106 "sub %[u_buf],%[v_buf] \n" | 2106 "sub %[u_buf],%[v_buf] \n" |
| 2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2108 LABELALIGN | 2108 LABELALIGN |
| 2109 "1: \n" | 2109 "1: \n" |
| 2110 READYUV422_AVX2 | 2110 READYUV422_AVX2 |
| 2111 YUVTORGB_AVX2(yuvconstants) | 2111 YUVTORGB_AVX2(yuvconstants) |
| 2112 STOREARGB_AVX2 | 2112 STOREARGB_AVX2 |
| 2113 "sub $0x10,%[width] \n" | 2113 "sub $0x10,%[width] \n" |
| 2114 "jg 1b \n" | 2114 "jg 1b \n" |
| 2115 |
| 2115 "vzeroupper \n" | 2116 "vzeroupper \n" |
| 2116 : [y_buf]"+r"(y_buf), // %[y_buf] | 2117 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2117 [u_buf]"+r"(u_buf), // %[u_buf] | 2118 [u_buf]"+r"(u_buf), // %[u_buf] |
| 2118 [v_buf]"+r"(v_buf), // %[v_buf] | 2119 [v_buf]"+r"(v_buf), // %[v_buf] |
| 2119 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2120 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2120 [width]"+rm"(width) // %[width] | 2121 [width]"+rm"(width) // %[width] |
| 2121 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2122 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2122 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 | 2123 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
| 2123 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2124 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2124 ); | 2125 ); |
| (...skipping 3326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5451 : "+r"(src), // %0 | 5452 : "+r"(src), // %0 |
| 5452 "+r"(dst), // %1 | 5453 "+r"(dst), // %1 |
| 5453 "+r"(width) // %2 | 5454 "+r"(width) // %2 |
| 5454 : "x"(scale * kScaleBias) // %3 | 5455 : "x"(scale * kScaleBias) // %3 |
| 5455 : "memory", "cc", | 5456 : "memory", "cc", |
| 5456 "xmm2", "xmm3", "xmm4", "xmm5" | 5457 "xmm2", "xmm3", "xmm4", "xmm5" |
| 5457 ); | 5458 ); |
| 5458 } | 5459 } |
| 5459 #endif // HAS_HALFFLOATROW_AVX2 | 5460 #endif // HAS_HALFFLOATROW_AVX2 |
| 5460 | 5461 |
| 5461 #ifdef HAS_HALFFLOATROW_F16C | 5462 //#ifdef HAS_HALFFLOATROW_F16C |
| 5462 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { | 5463 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { |
| 5463 asm volatile ( | 5464 asm volatile ( |
| 5464 "vbroadcastss %3, %%ymm4 \n" | 5465 "vbroadcastss %3, %%ymm4 \n" |
| 5465 | 5466 |
| 5466 // 16 pixel loop. | 5467 // 16 pixel loop. |
| 5467 LABELALIGN | 5468 LABELALIGN |
| 5468 "1: \n" | 5469 "1: \n" |
| 5469 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints | 5470 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints |
| 5470 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5471 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" |
| 5471 "lea " MEMLEA(0x20,0) ",%0 \n" | 5472 "lea " MEMLEA(0x20,0) ",%0 \n" |
| (...skipping 11 matching lines...) Expand all Loading... |
| 5483 | 5484 |
| 5484 "vzeroupper \n" | 5485 "vzeroupper \n" |
| 5485 : "+r"(src), // %0 | 5486 : "+r"(src), // %0 |
| 5486 "+r"(dst), // %1 | 5487 "+r"(dst), // %1 |
| 5487 "+r"(width) // %2 | 5488 "+r"(width) // %2 |
| 5488 : "x"(scale) // %3 | 5489 : "x"(scale) // %3 |
| 5489 : "memory", "cc", | 5490 : "memory", "cc", |
| 5490 "xmm2", "xmm3", "xmm4" | 5491 "xmm2", "xmm3", "xmm4" |
| 5491 ); | 5492 ); |
| 5492 } | 5493 } |
| 5493 #endif // HAS_HALFFLOATROW_F16C | 5494 //#endif // HAS_HALFFLOATROW_F16C |
| 5494 | 5495 |
| 5495 #ifdef HAS_HALFFLOATROW_F16C | 5496 #ifdef HAS_HALFFLOATROW_F16C |
| 5496 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { | 5497 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { |
| 5497 asm volatile ( | 5498 asm volatile ( |
| 5498 // 16 pixel loop. | 5499 // 16 pixel loop. |
| 5499 LABELALIGN | 5500 LABELALIGN |
| 5500 "1: \n" | 5501 "1: \n" |
| 5501 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints | 5502 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints |
| 5502 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5503 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" |
| 5503 "lea " MEMLEA(0x20,0) ",%0 \n" | 5504 "lea " MEMLEA(0x20,0) ",%0 \n" |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5684 ); | 5685 ); |
| 5685 } | 5686 } |
| 5686 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5687 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5687 | 5688 |
| 5688 #endif // defined(__x86_64__) || defined(__i386__) | 5689 #endif // defined(__x86_64__) || defined(__i386__) |
| 5689 | 5690 |
| 5690 #ifdef __cplusplus | 5691 #ifdef __cplusplus |
| 5691 } // extern "C" | 5692 } // extern "C" |
| 5692 } // namespace libyuv | 5693 } // namespace libyuv |
| 5693 #endif | 5694 #endif |
| OLD | NEW |