OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 5349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5360 "+r"(dst_argb), // %1 | 5360 "+r"(dst_argb), // %1 |
5361 "+r"(width) // %2 | 5361 "+r"(width) // %2 |
5362 : "r"(poly) // %3 | 5362 : "r"(poly) // %3 |
5363 : "memory", "cc", | 5363 : "memory", "cc", |
5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
5365 ); | 5365 ); |
5366 } | 5366 } |
5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 |
5368 | 5368 |
5369 #ifdef HAS_HALFFLOATROW_SSE2 | 5369 #ifdef HAS_HALFFLOATROW_SSE2 |
| 5370 static float kScaleBias = 1.9259299444e-34f; |
5370 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { | 5371 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { |
5371 float mult = 1.9259299444e-34f * scale; | |
5372 asm volatile ( | 5372 asm volatile ( |
5373 "movd %3,%%xmm4 \n" | 5373 "pshufd $0x0,%3,%%xmm4 \n" |
5374 "pshufd $0x0,%%xmm4,%%xmm4 \n" | |
5375 "pxor %%xmm5,%%xmm5 \n" | 5374 "pxor %%xmm5,%%xmm5 \n" |
5376 | 5375 |
5377 // 16 pixel loop. | 5376 // 16 pixel loop. |
5378 LABELALIGN | 5377 LABELALIGN |
5379 "1: \n" | 5378 "1: \n" |
5380 "movdqu " MEMACCESS(0) ",%%xmm0 \n" // 8 shorts | 5379 "movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts |
5381 "lea " MEMLEA(0x10,0) ",%0 \n" | 5380 "lea " MEMLEA(0x10,0) ",%0 \n" |
5382 "movdqa %%xmm0,%%xmm1 \n" | 5381 "movdqa %%xmm2,%%xmm3 \n" |
5383 "punpcklwd %%xmm5,%%xmm0 \n" // 8 ints in xmm0/1 | 5382 "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 |
5384 "cvtdq2ps %%xmm0,%%xmm0 \n" // 8 floats | 5383 "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats |
5385 "punpckhwd %%xmm5,%%xmm1 \n" | 5384 "punpckhwd %%xmm5,%%xmm3 \n" |
5386 "cvtdq2ps %%xmm1,%%xmm1 \n" | 5385 "cvtdq2ps %%xmm3,%%xmm3 \n" |
5387 "mulps %%xmm4,%%xmm0 \n" | 5386 "mulps %%xmm4,%%xmm2 \n" |
5388 "mulps %%xmm4,%%xmm1 \n" | 5387 "mulps %%xmm4,%%xmm3 \n" |
5389 "psrld $0xd,%%xmm0 \n" | 5388 "psrld $0xd,%%xmm2 \n" |
5390 "psrld $0xd,%%xmm1 \n" | 5389 "psrld $0xd,%%xmm3 \n" |
5391 "packssdw %%xmm1,%%xmm0 \n" | 5390 "packssdw %%xmm3,%%xmm2 \n" |
5392 "movdqu %%xmm0," MEMACCESS(1) " \n" | 5391 "movdqu %%xmm2," MEMACCESS(1) " \n" |
5393 "lea " MEMLEA(0x10,1) ",%1 \n" | 5392 "lea " MEMLEA(0x10,1) ",%1 \n" |
5394 "sub $0x8,%2 \n" | 5393 "sub $0x8,%2 \n" |
5395 "jg 1b \n" | 5394 "jg 1b \n" |
5396 : "+r"(src), // %0 | 5395 : "+r"(src), // %0 |
5397 "+r"(dst), // %1 | 5396 "+r"(dst), // %1 |
5398 "+r"(width) // %2 | 5397 "+r"(width) // %2 |
5399 : "rm"(mult) // %3 | 5398 : "x"(scale * kScaleBias) // %3 |
5400 : "memory", "cc", | 5399 : "memory", "cc", |
5401 "xmm0", "xmm1", "xmm4", "xmm5" | 5400 "xmm2", "xmm3", "xmm4", "xmm5" |
5402 ); | 5401 ); |
5403 } | 5402 } |
5404 #endif // HAS_HALFFLOATROW_SSE2 | 5403 #endif // HAS_HALFFLOATROW_SSE2 |
5405 | 5404 |
5406 #ifdef HAS_HALFFLOATROW_AVX2 | 5405 #ifdef HAS_HALFFLOATROW_AVX2 |
5407 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 5406 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { |
5408 asm volatile ( | 5407 asm volatile ( |
5409 "vbroadcastss %3, %%ymm4 \n" | 5408 "vbroadcastss %3, %%ymm4 \n" |
5410 | 5409 |
5411 // 16 pixel loop. | 5410 // 16 pixel loop. |
5412 LABELALIGN | 5411 LABELALIGN |
5413 "1: \n" | 5412 "1: \n" |
5414 "vpmovzxwd " MEMACCESS(0) ",%%ymm0 \n" // 8 shorts -> 8 ints | 5413 "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts -> 8 ints |
5415 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm1 \n" // 8 more | 5414 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" // 8 more |
5416 "lea " MEMLEA(0x20,0) ",%0 \n" | 5415 "lea " MEMLEA(0x20,0) ",%0 \n" |
5417 "vcvtdq2ps %%ymm0,%%ymm0 \n" | 5416 "vcvtdq2ps %%ymm2,%%ymm2 \n" |
5418 "vcvtdq2ps %%ymm1,%%ymm1 \n" | 5417 "vcvtdq2ps %%ymm3,%%ymm3 \n" |
5419 "vmulps %%ymm0,%%ymm4,%%ymm0 \n" | 5418 "vmulps %%ymm2,%%ymm4,%%ymm2 \n" |
5420 "vmulps %%ymm1,%%ymm4,%%ymm1 \n" | 5419 "vmulps %%ymm3,%%ymm4,%%ymm3 \n" |
5421 "vcvtps2ph $3, %%ymm0, %%xmm0 \n" | 5420 "vcvtps2ph $3, %%ymm2, %%xmm2 \n" |
5422 "vcvtps2ph $3, %%ymm1, %%xmm1 \n" | 5421 "vcvtps2ph $3, %%ymm3, %%xmm3 \n" |
5423 "vmovdqu %%xmm0," MEMACCESS(1) " \n" | 5422 "vmovdqu %%xmm2," MEMACCESS(1) " \n" |
5424 "vmovdqu %%xmm1," MEMACCESS2(0x10,1) " \n" | 5423 "vmovdqu %%xmm3," MEMACCESS2(0x10,1) " \n" |
5425 "lea " MEMLEA(0x20,1) ",%1 \n" | 5424 "lea " MEMLEA(0x20,1) ",%1 \n" |
5426 "sub $0x10,%2 \n" | 5425 "sub $0x10,%2 \n" |
5427 "jg 1b \n" | 5426 "jg 1b \n" |
5428 "vzeroupper \n" | 5427 "vzeroupper \n" |
5429 : "+r"(src), // %0 | 5428 : "+r"(src), // %0 |
5430 "+r"(dst), // %1 | 5429 "+r"(dst), // %1 |
5431 "+r"(width) // %2 | 5430 "+r"(width) // %2 |
5432 : "x"(scale) // %3 | 5431 : "x"(scale) // %3 |
5433 : "memory", "cc", | 5432 : "memory", "cc", |
5434 "xmm0", "xmm1", "xmm4" | 5433 "xmm2", "xmm3", "xmm4" |
5435 ); | 5434 ); |
5436 } | 5435 } |
5437 #endif // HAS_HALFFLOATROW_AVX2 | 5436 #endif // HAS_HALFFLOATROW_AVX2 |
5438 | 5437 |
5439 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 5438 #ifdef HAS_ARGBCOLORTABLEROW_X86 |
5440 // Tranform ARGB pixels with color table. | 5439 // Tranform ARGB pixels with color table. |
5441 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 5440 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, |
5442 int width) { | 5441 int width) { |
5443 uintptr_t pixel_temp; | 5442 uintptr_t pixel_temp; |
5444 asm volatile ( | 5443 asm volatile ( |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5595 ); | 5594 ); |
5596 } | 5595 } |
5597 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5596 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5598 | 5597 |
5599 #endif // defined(__x86_64__) || defined(__i386__) | 5598 #endif // defined(__x86_64__) || defined(__i386__) |
5600 | 5599 |
5601 #ifdef __cplusplus | 5600 #ifdef __cplusplus |
5602 } // extern "C" | 5601 } // extern "C" |
5603 } // namespace libyuv | 5602 } // namespace libyuv |
5604 #endif | 5603 #endif |
OLD | NEW |