| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 5349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5360 "+r"(dst_argb), // %1 | 5360 "+r"(dst_argb), // %1 |
| 5361 "+r"(width) // %2 | 5361 "+r"(width) // %2 |
| 5362 : "r"(poly) // %3 | 5362 : "r"(poly) // %3 |
| 5363 : "memory", "cc", | 5363 : "memory", "cc", |
| 5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
| 5365 ); | 5365 ); |
| 5366 } | 5366 } |
| 5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 |
| 5368 | 5368 |
| 5369 #ifdef HAS_HALFFLOATROW_SSE2 | 5369 #ifdef HAS_HALFFLOATROW_SSE2 |
| 5370 static float kScaleBias = 1.9259299444e-34f; |
| 5370 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { | 5371 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { |
| 5371 float mult = 1.9259299444e-34f * scale; | |
| 5372 asm volatile ( | 5372 asm volatile ( |
| 5373 "movd %3,%%xmm4 \n" | 5373 "pshufd $0x0,%3,%%xmm4 \n" |
| 5374 "pshufd $0x0,%%xmm4,%%xmm4 \n" | |
| 5375 "pxor %%xmm5,%%xmm5 \n" | 5374 "pxor %%xmm5,%%xmm5 \n" |
| 5376 | 5375 |
| 5377 // 16 pixel loop. | 5376 // 16 pixel loop. |
| 5378 LABELALIGN | 5377 LABELALIGN |
| 5379 "1: \n" | 5378 "1: \n" |
| 5380 "movdqu " MEMACCESS(0) ",%%xmm0 \n" // 8 shorts | 5379 "movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts |
| 5381 "lea " MEMLEA(0x10,0) ",%0 \n" | 5380 "lea " MEMLEA(0x10,0) ",%0 \n" |
| 5382 "movdqa %%xmm0,%%xmm1 \n" | 5381 "movdqa %%xmm2,%%xmm3 \n" |
| 5383 "punpcklwd %%xmm5,%%xmm0 \n" // 8 ints in xmm0/1 | 5382 "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 |
| 5384 "cvtdq2ps %%xmm0,%%xmm0 \n" // 8 floats | 5383 "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats |
| 5385 "punpckhwd %%xmm5,%%xmm1 \n" | 5384 "punpckhwd %%xmm5,%%xmm3 \n" |
| 5386 "cvtdq2ps %%xmm1,%%xmm1 \n" | 5385 "cvtdq2ps %%xmm3,%%xmm3 \n" |
| 5387 "mulps %%xmm4,%%xmm0 \n" | 5386 "mulps %%xmm4,%%xmm2 \n" |
| 5388 "mulps %%xmm4,%%xmm1 \n" | 5387 "mulps %%xmm4,%%xmm3 \n" |
| 5389 "psrld $0xd,%%xmm0 \n" | 5388 "psrld $0xd,%%xmm2 \n" |
| 5390 "psrld $0xd,%%xmm1 \n" | 5389 "psrld $0xd,%%xmm3 \n" |
| 5391 "packssdw %%xmm1,%%xmm0 \n" | 5390 "packssdw %%xmm3,%%xmm2 \n" |
| 5392 "movdqu %%xmm0," MEMACCESS(1) " \n" | 5391 "movdqu %%xmm2," MEMACCESS(1) " \n" |
| 5393 "lea " MEMLEA(0x10,1) ",%1 \n" | 5392 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 5394 "sub $0x8,%2 \n" | 5393 "sub $0x8,%2 \n" |
| 5395 "jg 1b \n" | 5394 "jg 1b \n" |
| 5396 : "+r"(src), // %0 | 5395 : "+r"(src), // %0 |
| 5397 "+r"(dst), // %1 | 5396 "+r"(dst), // %1 |
| 5398 "+r"(width) // %2 | 5397 "+r"(width) // %2 |
| 5399 : "rm"(mult) // %3 | 5398 : "x"(scale * kScaleBias) // %3 |
| 5400 : "memory", "cc", | 5399 : "memory", "cc", |
| 5401 "xmm0", "xmm1", "xmm4", "xmm5" | 5400 "xmm2", "xmm3", "xmm4", "xmm5" |
| 5402 ); | 5401 ); |
| 5403 } | 5402 } |
| 5404 #endif // HAS_HALFFLOATROW_SSE2 | 5403 #endif // HAS_HALFFLOATROW_SSE2 |
| 5405 | 5404 |
| 5406 #ifdef HAS_HALFFLOATROW_AVX2 | 5405 #ifdef HAS_HALFFLOATROW_AVX2 |
| 5407 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 5406 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { |
| 5408 asm volatile ( | 5407 asm volatile ( |
| 5409 "vbroadcastss %3, %%ymm4 \n" | 5408 "vbroadcastss %3, %%ymm4 \n" |
| 5410 | 5409 |
| 5411 // 16 pixel loop. | 5410 // 16 pixel loop. |
| 5412 LABELALIGN | 5411 LABELALIGN |
| 5413 "1: \n" | 5412 "1: \n" |
| 5414 "vpmovzxwd " MEMACCESS(0) ",%%ymm0 \n" // 8 shorts -> 8 ints | 5413 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts -> 8 ints |
| 5415 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm1 \n" // 8 more | 5414 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" // 8 more |
| 5416 "lea " MEMLEA(0x20,0) ",%0 \n" | 5415 "lea " MEMLEA(0x20,0) ",%0 \n" |
| 5417 "vcvtdq2ps %%ymm0,%%ymm0 \n" | 5416 "vcvtdq2ps %%ymm2,%%ymm2 \n" |
| 5418 "vcvtdq2ps %%ymm1,%%ymm1 \n" | 5417 "vcvtdq2ps %%ymm3,%%ymm3 \n" |
| 5419 "vmulps %%ymm0,%%ymm4,%%ymm0 \n" | 5418 "vmulps %%ymm2,%%ymm4,%%ymm2 \n" |
| 5420 "vmulps %%ymm1,%%ymm4,%%ymm1 \n" | 5419 "vmulps %%ymm3,%%ymm4,%%ymm3 \n" |
| 5421 "vcvtps2ph $3, %%ymm0, %%xmm0 \n" | 5420 "vcvtps2ph $3, %%ymm2, %%xmm2 \n" |
| 5422 "vcvtps2ph $3, %%ymm1, %%xmm1 \n" | 5421 "vcvtps2ph $3, %%ymm3, %%xmm3 \n" |
| 5423 "vmovdqu %%xmm0," MEMACCESS(1) " \n" | 5422 "vmovdqu %%xmm2," MEMACCESS(1) " \n" |
| 5424 "vmovdqu %%xmm1," MEMACCESS2(0x10,1) " \n" | 5423 "vmovdqu %%xmm3," MEMACCESS2(0x10,1) " \n" |
| 5425 "lea " MEMLEA(0x20,1) ",%1 \n" | 5424 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 5426 "sub $0x10,%2 \n" | 5425 "sub $0x10,%2 \n" |
| 5427 "jg 1b \n" | 5426 "jg 1b \n" |
| 5428 "vzeroupper \n" | 5427 "vzeroupper \n" |
| 5429 : "+r"(src), // %0 | 5428 : "+r"(src), // %0 |
| 5430 "+r"(dst), // %1 | 5429 "+r"(dst), // %1 |
| 5431 "+r"(width) // %2 | 5430 "+r"(width) // %2 |
| 5432 : "x"(scale) // %3 | 5431 : "x"(scale) // %3 |
| 5433 : "memory", "cc", | 5432 : "memory", "cc", |
| 5434 "xmm0", "xmm1", "xmm4" | 5433 "xmm2", "xmm3", "xmm4" |
| 5435 ); | 5434 ); |
| 5436 } | 5435 } |
| 5437 #endif // HAS_HALFFLOATROW_AVX2 | 5436 #endif // HAS_HALFFLOATROW_AVX2 |
| 5438 | 5437 |
| 5439 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 5438 #ifdef HAS_ARGBCOLORTABLEROW_X86 |
| 5440 // Tranform ARGB pixels with color table. | 5439 // Tranform ARGB pixels with color table. |
| 5441 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 5440 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, |
| 5442 int width) { | 5441 int width) { |
| 5443 uintptr_t pixel_temp; | 5442 uintptr_t pixel_temp; |
| 5444 asm volatile ( | 5443 asm volatile ( |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5595 ); | 5594 ); |
| 5596 } | 5595 } |
| 5597 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5596 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5598 | 5597 |
| 5599 #endif // defined(__x86_64__) || defined(__i386__) | 5598 #endif // defined(__x86_64__) || defined(__i386__) |
| 5600 | 5599 |
| 5601 #ifdef __cplusplus | 5600 #ifdef __cplusplus |
| 5602 } // extern "C" | 5601 } // extern "C" |
| 5603 } // namespace libyuv | 5602 } // namespace libyuv |
| 5604 #endif | 5603 #endif |
| OLD | NEW |