OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 2094 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2105 YUVTORGB_SETUP_AVX2(yuvconstants) | 2105 YUVTORGB_SETUP_AVX2(yuvconstants) |
2106 "sub %[u_buf],%[v_buf] \n" | 2106 "sub %[u_buf],%[v_buf] \n" |
2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2108 LABELALIGN | 2108 LABELALIGN |
2109 "1: \n" | 2109 "1: \n" |
2110 READYUV422_AVX2 | 2110 READYUV422_AVX2 |
2111 YUVTORGB_AVX2(yuvconstants) | 2111 YUVTORGB_AVX2(yuvconstants) |
2112 STOREARGB_AVX2 | 2112 STOREARGB_AVX2 |
2113 "sub $0x10,%[width] \n" | 2113 "sub $0x10,%[width] \n" |
2114 "jg 1b \n" | 2114 "jg 1b \n" |
| 2115 |
2115 "vzeroupper \n" | 2116 "vzeroupper \n" |
2116 : [y_buf]"+r"(y_buf), // %[y_buf] | 2117 : [y_buf]"+r"(y_buf), // %[y_buf] |
2117 [u_buf]"+r"(u_buf), // %[u_buf] | 2118 [u_buf]"+r"(u_buf), // %[u_buf] |
2118 [v_buf]"+r"(v_buf), // %[v_buf] | 2119 [v_buf]"+r"(v_buf), // %[v_buf] |
2119 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2120 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
2120 [width]"+rm"(width) // %[width] | 2121 [width]"+rm"(width) // %[width] |
2121 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2122 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2122 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 | 2123 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 |
2123 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2124 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2124 ); | 2125 ); |
(...skipping 3326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5451 : "+r"(src), // %0 | 5452 : "+r"(src), // %0 |
5452 "+r"(dst), // %1 | 5453 "+r"(dst), // %1 |
5453 "+r"(width) // %2 | 5454 "+r"(width) // %2 |
5454 : "x"(scale * kScaleBias) // %3 | 5455 : "x"(scale * kScaleBias) // %3 |
5455 : "memory", "cc", | 5456 : "memory", "cc", |
5456 "xmm2", "xmm3", "xmm4", "xmm5" | 5457 "xmm2", "xmm3", "xmm4", "xmm5" |
5457 ); | 5458 ); |
5458 } | 5459 } |
5459 #endif // HAS_HALFFLOATROW_AVX2 | 5460 #endif // HAS_HALFFLOATROW_AVX2 |
5460 | 5461 |
5461 #ifdef HAS_HALFFLOATROW_F16C | 5462 //#ifdef HAS_HALFFLOATROW_F16C |
5462 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { | 5463 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { |
5463 asm volatile ( | 5464 asm volatile ( |
5464 "vbroadcastss %3, %%ymm4 \n" | 5465 "vbroadcastss %3, %%ymm4 \n" |
5465 | 5466 |
5466 // 16 pixel loop. | 5467 // 16 pixel loop. |
5467 LABELALIGN | 5468 LABELALIGN |
5468 "1: \n" | 5469 "1: \n" |
5469 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints | 5470 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints |
5470 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5471 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" |
5471 "lea " MEMLEA(0x20,0) ",%0 \n" | 5472 "lea " MEMLEA(0x20,0) ",%0 \n" |
(...skipping 11 matching lines...) Expand all Loading... |
5483 | 5484 |
5484 "vzeroupper \n" | 5485 "vzeroupper \n" |
5485 : "+r"(src), // %0 | 5486 : "+r"(src), // %0 |
5486 "+r"(dst), // %1 | 5487 "+r"(dst), // %1 |
5487 "+r"(width) // %2 | 5488 "+r"(width) // %2 |
5488 : "x"(scale) // %3 | 5489 : "x"(scale) // %3 |
5489 : "memory", "cc", | 5490 : "memory", "cc", |
5490 "xmm2", "xmm3", "xmm4" | 5491 "xmm2", "xmm3", "xmm4" |
5491 ); | 5492 ); |
5492 } | 5493 } |
5493 #endif // HAS_HALFFLOATROW_F16C | 5494 //#endif // HAS_HALFFLOATROW_F16C |
5494 | 5495 |
5495 #ifdef HAS_HALFFLOATROW_F16C | 5496 #ifdef HAS_HALFFLOATROW_F16C |
5496 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { | 5497 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { |
5497 asm volatile ( | 5498 asm volatile ( |
5498 // 16 pixel loop. | 5499 // 16 pixel loop. |
5499 LABELALIGN | 5500 LABELALIGN |
5500 "1: \n" | 5501 "1: \n" |
5501 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints | 5502 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints |
5502 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5503 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" |
5503 "lea " MEMLEA(0x20,0) ",%0 \n" | 5504 "lea " MEMLEA(0x20,0) ",%0 \n" |
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5684 ); | 5685 ); |
5685 } | 5686 } |
5686 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5687 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5687 | 5688 |
5688 #endif // defined(__x86_64__) || defined(__i386__) | 5689 #endif // defined(__x86_64__) || defined(__i386__) |
5689 | 5690 |
5690 #ifdef __cplusplus | 5691 #ifdef __cplusplus |
5691 } // extern "C" | 5692 } // extern "C" |
5692 } // namespace libyuv | 5693 } // namespace libyuv |
5693 #endif | 5694 #endif |
OLD | NEW |