OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 6077 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6088 vmovq qword ptr [edx], xmm0 | 6088 vmovq qword ptr [edx], xmm0 |
6089 lea edx, [edx + 8] | 6089 lea edx, [edx + 8] |
6090 sub ecx, 2 | 6090 sub ecx, 2 |
6091 jg convertloop | 6091 jg convertloop |
6092 vzeroupper | 6092 vzeroupper |
6093 ret | 6093 ret |
6094 } | 6094 } |
6095 } | 6095 } |
6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 |
6097 | 6097 |
| 6098 #ifdef HAS_HALFFLOATROW_SSE2 |
| 6099 static float kExpBias = 1.9259299444e-34f; |
| 6100 __declspec(naked) |
| 6101 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { |
| 6102 __asm { |
| 6103 mov eax, [esp + 4] /* src */ |
| 6104 mov edx, [esp + 8] /* dst */ |
| 6105 movd xmm4, dword ptr [esp + 12] /* scale */ |
| 6106 mov ecx, [esp + 16] /* width */ |
| 6107 mulss xmm4, kExpBias |
| 6108 pshufd xmm4, xmm4, 0 |
| 6109 pxor xmm5, xmm5 |
| 6110 |
| 6111 // 8 pixel loop. |
| 6112 convertloop: |
| 6113 movdqu xmm2, xmmword ptr [eax] // 8 shorts |
| 6114 lea eax, [eax + 16] |
| 6115 movdqa xmm3, xmm2 |
| 6116 punpcklwd xmm2, xmm5 |
| 6117 cvtdq2ps xmm2, xmm2 // convert 8 ints to floats |
| 6118 punpckhwd xmm3, xmm5 |
| 6119 cvtdq2ps xmm3, xmm3 |
| 6120 mulps xmm2, xmm4 |
| 6121 mulps xmm3, xmm4 |
| 6122 psrld xmm2, 13 |
| 6123 psrld xmm3, 13 |
| 6124 packssdw xmm2, xmm3 |
| 6125 movdqu [edx], xmm2 |
| 6126 lea edx, [edx + 16] |
| 6127 sub ecx, 8 |
| 6128 jg convertloop |
| 6129 ret |
| 6130 } |
| 6131 } |
| 6132 #endif // HAS_HALFFLOATROW_SSE2 |
| 6133 |
6098 #ifdef HAS_HALFFLOATROW_AVX2 | 6134 #ifdef HAS_HALFFLOATROW_AVX2 |
6099 __declspec(naked) | 6135 __declspec(naked) |
6100 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 6136 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { |
6101 __asm { | 6137 __asm { |
6102 mov eax, [esp + 4] /* src */ | 6138 mov eax, [esp + 4] /* src */ |
6103 mov edx, [esp + 8] /* dst */ | 6139 mov edx, [esp + 8] /* dst */ |
6104 vbroadcastss ymm4, [esp + 12] /* scale */ | 6140 vbroadcastss ymm4, [esp + 12] /* scale */ |
6105 mov ecx, [esp + 16] /* width */ | 6141 mov ecx, [esp + 16] /* width */ |
6106 | 6142 |
6107 // 8 pixel loop. | 6143 // 8 pixel loop. |
6108 convertloop: | 6144 convertloop: |
6109 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints | 6145 vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints |
6110 vpmovzxwd ymm1, xmmword ptr [eax + 16] // 8 more shorts | 6146 vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts |
6111 lea eax, [eax + 32] | 6147 lea eax, [eax + 32] |
6112 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats | 6148 vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats |
6113 vcvtdq2ps ymm1, ymm1 | 6149 vcvtdq2ps ymm3, ymm3 |
6114 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 | 6150 vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1 |
6115 vmulps ymm1, ymm1, ymm4 | 6151 vmulps ymm3, ymm3, ymm4 |
6116 vcvtps2ph xmm0, ymm0, 3 // float convert to 8 half floats truncate | 6152 vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate |
6117 vcvtps2ph xmm1, ymm1, 3 | 6153 vcvtps2ph xmm3, ymm3, 3 |
6118 vmovdqu [edx], xmm0 | 6154 vmovdqu [edx], xmm2 |
6119 vmovdqu [edx + 16], xmm1 | 6155 vmovdqu [edx + 16], xmm3 |
6120 lea edx, [edx + 32] | 6156 lea edx, [edx + 32] |
6121 sub ecx, 16 | 6157 sub ecx, 16 |
6122 jg convertloop | 6158 jg convertloop |
6123 vzeroupper | 6159 vzeroupper |
6124 ret | 6160 ret |
6125 } | 6161 } |
6126 } | 6162 } |
6127 #endif // HAS_HALFFLOATROW_AVX2 | 6163 #endif // HAS_HALFFLOATROW_AVX2 |
6128 | 6164 |
6129 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 6165 #ifdef HAS_ARGBCOLORTABLEROW_X86 |
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6291 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6327 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6292 | 6328 |
6293 #endif // defined(_M_X64) | 6329 #endif // defined(_M_X64) |
6294 | 6330 |
6295 #ifdef __cplusplus | 6331 #ifdef __cplusplus |
6296 } // extern "C" | 6332 } // extern "C" |
6297 } // namespace libyuv | 6333 } // namespace libyuv |
6298 #endif | 6334 #endif |
6299 | 6335 |
6300 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6336 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
OLD | NEW |