OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 6077 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6088 vmovq qword ptr [edx], xmm0 | 6088 vmovq qword ptr [edx], xmm0 |
6089 lea edx, [edx + 8] | 6089 lea edx, [edx + 8] |
6090 sub ecx, 2 | 6090 sub ecx, 2 |
6091 jg convertloop | 6091 jg convertloop |
6092 vzeroupper | 6092 vzeroupper |
6093 ret | 6093 ret |
6094 } | 6094 } |
6095 } | 6095 } |
6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 |
6097 | 6097 |
6098 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor | 6098 #ifdef HAS_HALFFLOATROW_AVX2 |
6099 // adjust the sample range to 0 to 1 using a float multiply. | |
6100 // e.g. 9 bit scale is 1.0f / 512.0f | |
6101 // e.g. 10 bit scale is 1.0f / 1024.0f | |
6102 #ifdef HAS_SHORTTOHALFFLOAT_AVX2 | |
6103 __declspec(naked) | 6099 __declspec(naked) |
6104 void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width) { | 6100 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { |
6105 __asm { | 6101 __asm { |
6106 mov eax, [esp + 4] /* src */ | 6102 mov eax, [esp + 4] /* src */ |
6107 mov edx, [esp + 8] /* dst */ | 6103 mov edx, [esp + 8] /* dst */ |
6108 vbroadcastss ymm4, [esp + 12] /* scale */ | 6104 vbroadcastss ymm4, [esp + 12] /* scale */ |
6109 mov ecx, [esp + 16] /* width */ | 6105 mov ecx, [esp + 16] /* width */ |
6110 | 6106 |
6111 // 8 pixel loop. | 6107 // 8 pixel loop. |
6112 convertloop: | 6108 convertloop: |
6113 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints | 6109 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints |
6114 lea eax, [eax + 16] | 6110 vpmovzxwd ymm1, xmmword ptr [eax + 16] // 8 more shorts |
| 6111 lea eax, [eax + 32] |
6115 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats | 6112 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats |
| 6113 vcvtdq2ps ymm1, ymm1 |
6116 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 | 6114 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 |
6117 vcvtps2ph xmm0, ymm0, 0 // float conver to 8 half floats round even | 6115 vmulps ymm1, ymm1, ymm4 |
| 6116 vcvtps2ph xmm0, ymm0, 3 // float convert to 8 half floats truncate |
| 6117 vcvtps2ph xmm1, ymm1, 3 |
6118 vmovdqu [edx], xmm0 | 6118 vmovdqu [edx], xmm0 |
6119 lea edx, [edx + 16] | 6119 vmovdqu [edx + 16], xmm1 |
6120 sub ecx, 8 | 6120 lea edx, [edx + 32] |
| 6121 sub ecx, 16 |
6121 jg convertloop | 6122 jg convertloop |
6122 vzeroupper | 6123 vzeroupper |
6123 ret | 6124 ret |
6124 } | 6125 } |
6125 } | 6126 } |
6126 #endif // HAS_SHORTTOHALFFLOAT_AVX2 | 6127 #endif // HAS_HALFFLOATROW_AVX2 |
6127 | 6128 |
6128 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 6129 #ifdef HAS_ARGBCOLORTABLEROW_X86 |
6129 // Tranform ARGB pixels with color table. | 6130 // Tranform ARGB pixels with color table. |
6130 __declspec(naked) | 6131 __declspec(naked) |
6131 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 6132 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, |
6132 int width) { | 6133 int width) { |
6133 __asm { | 6134 __asm { |
6134 push esi | 6135 push esi |
6135 mov eax, [esp + 4 + 4] /* dst_argb */ | 6136 mov eax, [esp + 4 + 4] /* dst_argb */ |
6136 mov esi, [esp + 4 + 8] /* table_argb */ | 6137 mov esi, [esp + 4 + 8] /* table_argb */ |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6290 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6291 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6291 | 6292 |
6292 #endif // defined(_M_X64) | 6293 #endif // defined(_M_X64) |
6293 | 6294 |
6294 #ifdef __cplusplus | 6295 #ifdef __cplusplus |
6295 } // extern "C" | 6296 } // extern "C" |
6296 } // namespace libyuv | 6297 } // namespace libyuv |
6297 #endif | 6298 #endif |
6298 | 6299 |
6299 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6300 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
OLD | NEW |