| OLD | NEW | 
|---|
| 1 // VERSION 2 | 1 // VERSION 2 | 
| 2 /* | 2 /* | 
| 3  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 
| 4  * | 4  * | 
| 5  *  Use of this source code is governed by a BSD-style license | 5  *  Use of this source code is governed by a BSD-style license | 
| 6  *  that can be found in the LICENSE file in the root of the source | 6  *  that can be found in the LICENSE file in the root of the source | 
| 7  *  tree. An additional intellectual property rights grant can be found | 7  *  tree. An additional intellectual property rights grant can be found | 
| 8  *  in the file PATENTS. All contributing project authors may | 8  *  in the file PATENTS. All contributing project authors may | 
| 9  *  be found in the AUTHORS file in the root of the source tree. | 9  *  be found in the AUTHORS file in the root of the source tree. | 
| 10  */ | 10  */ | 
| (...skipping 2094 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 2105     YUVTORGB_SETUP_AVX2(yuvconstants) | 2105     YUVTORGB_SETUP_AVX2(yuvconstants) | 
| 2106     "sub       %[u_buf],%[v_buf]               \n" | 2106     "sub       %[u_buf],%[v_buf]               \n" | 
| 2107     "vpcmpeqb  %%ymm5,%%ymm5,%%ymm5            \n" | 2107     "vpcmpeqb  %%ymm5,%%ymm5,%%ymm5            \n" | 
| 2108     LABELALIGN | 2108     LABELALIGN | 
| 2109   "1:                                          \n" | 2109   "1:                                          \n" | 
| 2110     READYUV422_AVX2 | 2110     READYUV422_AVX2 | 
| 2111     YUVTORGB_AVX2(yuvconstants) | 2111     YUVTORGB_AVX2(yuvconstants) | 
| 2112     STOREARGB_AVX2 | 2112     STOREARGB_AVX2 | 
| 2113     "sub       $0x10,%[width]                  \n" | 2113     "sub       $0x10,%[width]                  \n" | 
| 2114     "jg        1b                              \n" | 2114     "jg        1b                              \n" | 
|  | 2115 | 
| 2115     "vzeroupper                                \n" | 2116     "vzeroupper                                \n" | 
| 2116   : [y_buf]"+r"(y_buf),    // %[y_buf] | 2117   : [y_buf]"+r"(y_buf),    // %[y_buf] | 
| 2117     [u_buf]"+r"(u_buf),    // %[u_buf] | 2118     [u_buf]"+r"(u_buf),    // %[u_buf] | 
| 2118     [v_buf]"+r"(v_buf),    // %[v_buf] | 2119     [v_buf]"+r"(v_buf),    // %[v_buf] | 
| 2119     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 2120     [dst_argb]"+r"(dst_argb),  // %[dst_argb] | 
| 2120     [width]"+rm"(width)    // %[width] | 2121     [width]"+rm"(width)    // %[width] | 
| 2121   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 2122   : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants] | 
| 2122   : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 | 2123   : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 | 
| 2123     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2124     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 
| 2124   ); | 2125   ); | 
| (...skipping 3326 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 5451   : "+r"(src),    // %0 | 5452   : "+r"(src),    // %0 | 
| 5452     "+r"(dst),    // %1 | 5453     "+r"(dst),    // %1 | 
| 5453     "+r"(width)   // %2 | 5454     "+r"(width)   // %2 | 
| 5454   : "x"(scale * kScaleBias)   // %3 | 5455   : "x"(scale * kScaleBias)   // %3 | 
| 5455   : "memory", "cc", | 5456   : "memory", "cc", | 
| 5456     "xmm2", "xmm3", "xmm4", "xmm5" | 5457     "xmm2", "xmm3", "xmm4", "xmm5" | 
| 5457   ); | 5458   ); | 
| 5458 } | 5459 } | 
| 5459 #endif  // HAS_HALFFLOATROW_AVX2 | 5460 #endif  // HAS_HALFFLOATROW_AVX2 | 
| 5460 | 5461 | 
| 5461 #ifdef HAS_HALFFLOATROW_F16C | 5462 //#ifdef HAS_HALFFLOATROW_F16C | 
| 5462 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { | 5463 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { | 
| 5463   asm volatile ( | 5464   asm volatile ( | 
| 5464    "vbroadcastss  %3, %%ymm4                  \n" | 5465    "vbroadcastss  %3, %%ymm4                  \n" | 
| 5465 | 5466 | 
| 5466     // 16 pixel loop. | 5467     // 16 pixel loop. | 
| 5467     LABELALIGN | 5468     LABELALIGN | 
| 5468   "1:                                          \n" | 5469   "1:                                          \n" | 
| 5469     "vpmovzxwd   " MEMACCESS(0) ",%%ymm2       \n"  // 16 shorts -> 16 ints | 5470     "vpmovzxwd   " MEMACCESS(0) ",%%ymm2       \n"  // 16 shorts -> 16 ints | 
| 5470     "vpmovzxwd   " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5471     "vpmovzxwd   " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 
| 5471     "lea         " MEMLEA(0x20,0) ",%0         \n" | 5472     "lea         " MEMLEA(0x20,0) ",%0         \n" | 
| (...skipping 11 matching lines...) Expand all  Loading... | 
| 5483 | 5484 | 
| 5484     "vzeroupper                                \n" | 5485     "vzeroupper                                \n" | 
| 5485   : "+r"(src),   // %0 | 5486   : "+r"(src),   // %0 | 
| 5486     "+r"(dst),   // %1 | 5487     "+r"(dst),   // %1 | 
| 5487     "+r"(width)  // %2 | 5488     "+r"(width)  // %2 | 
| 5488   : "x"(scale)   // %3 | 5489   : "x"(scale)   // %3 | 
| 5489   : "memory", "cc", | 5490   : "memory", "cc", | 
| 5490     "xmm2", "xmm3", "xmm4" | 5491     "xmm2", "xmm3", "xmm4" | 
| 5491   ); | 5492   ); | 
| 5492 } | 5493 } | 
| 5493 #endif  // HAS_HALFFLOATROW_F16C | 5494 //#endif  // HAS_HALFFLOATROW_F16C | 
| 5494 | 5495 | 
| 5495 #ifdef HAS_HALFFLOATROW_F16C | 5496 #ifdef HAS_HALFFLOATROW_F16C | 
| 5496 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { | 5497 void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { | 
| 5497   asm volatile ( | 5498   asm volatile ( | 
| 5498     // 16 pixel loop. | 5499     // 16 pixel loop. | 
| 5499     LABELALIGN | 5500     LABELALIGN | 
| 5500   "1:                                          \n" | 5501   "1:                                          \n" | 
| 5501     "vpmovzxwd   " MEMACCESS(0) ",%%ymm2       \n"  // 16 shorts -> 16 ints | 5502     "vpmovzxwd   " MEMACCESS(0) ",%%ymm2       \n"  // 16 shorts -> 16 ints | 
| 5502     "vpmovzxwd   " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 5503     "vpmovzxwd   " MEMACCESS2(0x10,0) ",%%ymm3 \n" | 
| 5503     "lea         " MEMLEA(0x20,0) ",%0         \n" | 5504     "lea         " MEMLEA(0x20,0) ",%0         \n" | 
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 5684   ); | 5685   ); | 
| 5685 } | 5686 } | 
| 5686 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5687 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 
| 5687 | 5688 | 
| 5688 #endif  // defined(__x86_64__) || defined(__i386__) | 5689 #endif  // defined(__x86_64__) || defined(__i386__) | 
| 5689 | 5690 | 
| 5690 #ifdef __cplusplus | 5691 #ifdef __cplusplus | 
| 5691 }  // extern "C" | 5692 }  // extern "C" | 
| 5692 }  // namespace libyuv | 5693 }  // namespace libyuv | 
| 5693 #endif | 5694 #endif | 
| OLD | NEW | 
|---|