OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 5348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5359 : "+r"(src_argb), // %0 | 5359 : "+r"(src_argb), // %0 |
5360 "+r"(dst_argb), // %1 | 5360 "+r"(dst_argb), // %1 |
5361 "+r"(width) // %2 | 5361 "+r"(width) // %2 |
5362 : "r"(poly) // %3 | 5362 : "r"(poly) // %3 |
5363 : "memory", "cc", | 5363 : "memory", "cc", |
5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 5364 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
5365 ); | 5365 ); |
5366 } | 5366 } |
5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 5367 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 |
5368 | 5368 |
| 5369 #ifdef HAS_HALFFLOATROW_SSE2 |
| 5370 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { |
| 5371 float mult = 1.9259299444e-34f * scale; |
| 5372 asm volatile ( |
| 5373 "movd %3,%%xmm4 \n" |
| 5374 "pshufd $0x0,%%xmm4,%%xmm4 \n" |
| 5375 "pxor %%xmm5,%%xmm5 \n" |
| 5376 |
| 5377 // 16 pixel loop. |
| 5378 LABELALIGN |
| 5379 "1: \n" |
| 5380 "movdqu " MEMACCESS(0) ",%%xmm0 \n" // 8 shorts |
| 5381 "lea " MEMLEA(0x10,0) ",%0 \n" |
| 5382 "movdqa %%xmm0,%%xmm1 \n" |
| 5383 "punpcklwd %%xmm5,%%xmm0 \n" // 8 ints in xmm0/1 |
| 5384 "cvtdq2ps %%xmm0,%%xmm0 \n" // 8 floats |
| 5385 "punpckhwd %%xmm5,%%xmm1 \n" |
| 5386 "cvtdq2ps %%xmm1,%%xmm1 \n" |
| 5387 "mulps %%xmm4,%%xmm0 \n" |
| 5388 "mulps %%xmm4,%%xmm1 \n" |
| 5389 "psrld $0xd,%%xmm0 \n" |
| 5390 "psrld $0xd,%%xmm1 \n" |
| 5391 "packssdw %%xmm1,%%xmm0 \n" |
| 5392 "movdqu %%xmm0," MEMACCESS(1) " \n" |
| 5393 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 5394 "sub $0x8,%2 \n" |
| 5395 "jg 1b \n" |
| 5396 : "+r"(src), // %0 |
| 5397 "+r"(dst), // %1 |
| 5398 "+r"(width) // %2 |
| 5399 : "rm"(mult) // %3 |
| 5400 : "memory", "cc", |
| 5401 "xmm0", "xmm1", "xmm4", "xmm5" |
| 5402 ); |
| 5403 } |
| 5404 #endif // HAS_HALFFLOATROW_SSE2 |
| 5405 |
5369 #ifdef HAS_HALFFLOATROW_AVX2 | 5406 #ifdef HAS_HALFFLOATROW_AVX2 |
5370 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 5407 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { |
5371 asm volatile ( | 5408 asm volatile ( |
5372 "vbroadcastss %3, %%ymm4 \n" | 5409 "vbroadcastss %3, %%ymm4 \n" |
5373 | 5410 |
5374 // 16 pixel loop. | 5411 // 16 pixel loop. |
5375 LABELALIGN | 5412 LABELALIGN |
5376 "1: \n" | 5413 "1: \n" |
5377 "vpmovzxwd " MEMACCESS(0) ",%%ymm0 \n" // 8 shorts -> 8 ints | 5414 "vpmovzxwd " MEMACCESS(0) ",%%ymm0 \n" // 8 shorts -> 8 ints |
5378 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm1 \n" // 8 more | 5415 "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm1 \n" // 8 more |
5379 "lea " MEMLEA(0x20,0) ",%0 \n" | 5416 "lea " MEMLEA(0x20,0) ",%0 \n" |
5380 "vcvtdq2ps %%ymm0,%%ymm0 \n" | 5417 "vcvtdq2ps %%ymm0,%%ymm0 \n" |
5381 "vcvtdq2ps %%ymm1,%%ymm1 \n" | 5418 "vcvtdq2ps %%ymm1,%%ymm1 \n" |
5382 "vmulps %%ymm0,%%ymm4,%%ymm0 \n" | 5419 "vmulps %%ymm0,%%ymm4,%%ymm0 \n" |
5383 "vmulps %%ymm1,%%ymm4,%%ymm1 \n" | 5420 "vmulps %%ymm1,%%ymm4,%%ymm1 \n" |
5384 "vcvtps2ph $3, %%ymm0, %%xmm0 \n" | 5421 "vcvtps2ph $3, %%ymm0, %%xmm0 \n" |
5385 "vcvtps2ph $3, %%ymm1, %%xmm1 \n" | 5422 "vcvtps2ph $3, %%ymm1, %%xmm1 \n" |
5386 "vmovdqu %%xmm0," MEMACCESS(1) " \n" | 5423 "vmovdqu %%xmm0," MEMACCESS(1) " \n" |
5387 "vmovdqu %%xmm1," MEMACCESS2(0x10,1) " \n" | 5424 "vmovdqu %%xmm1," MEMACCESS2(0x10,1) " \n" |
5388 "lea " MEMLEA(0x20,1) ",%1 \n" | 5425 "lea " MEMLEA(0x20,1) ",%1 \n" |
5389 "sub $0x10,%2 \n" | 5426 "sub $0x10,%2 \n" |
5390 "jg 1b \n" | 5427 "jg 1b \n" |
5391 "vzeroupper \n" | 5428 "vzeroupper \n" |
5392 : "+r"(src), // %0 | 5429 : "+r"(src), // %0 |
5393 "+r"(dst), // %1 | 5430 "+r"(dst), // %1 |
5394 "+r"(width) // %2 | 5431 "+r"(width) // %2 |
5395 : "x"(scale) // %3 | 5432 : "x"(scale) // %3 |
5396 : "memory", "cc", | 5433 : "memory", "cc", |
5397 "xmm0", "xmm4" | 5434 "xmm0", "xmm1", "xmm4" |
5398 ); | 5435 ); |
5399 } | 5436 } |
5400 #endif // HAS_HALFFLOATROW_AVX2 | 5437 #endif // HAS_HALFFLOATROW_AVX2 |
5401 | 5438 |
5402 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 5439 #ifdef HAS_ARGBCOLORTABLEROW_X86 |
5403 // Tranform ARGB pixels with color table. | 5440 // Tranform ARGB pixels with color table. |
5404 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 5441 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, |
5405 int width) { | 5442 int width) { |
5406 uintptr_t pixel_temp; | 5443 uintptr_t pixel_temp; |
5407 asm volatile ( | 5444 asm volatile ( |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5558 ); | 5595 ); |
5559 } | 5596 } |
5560 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5597 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5561 | 5598 |
5562 #endif // defined(__x86_64__) || defined(__i386__) | 5599 #endif // defined(__x86_64__) || defined(__i386__) |
5563 | 5600 |
5564 #ifdef __cplusplus | 5601 #ifdef __cplusplus |
5565 } // extern "C" | 5602 } // extern "C" |
5566 } // namespace libyuv | 5603 } // namespace libyuv |
5567 #endif | 5604 #endif |
OLD | NEW |