Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: source/row_win.cc

Issue 2387713002: HalfFloat_SSE2 for Visual C (Closed)
Patch Set: planar functions sse2 only Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 6077 matching lines...) Expand 10 before | Expand all | Expand 10 after
6088 vmovq qword ptr [edx], xmm0 6088 vmovq qword ptr [edx], xmm0
6089 lea edx, [edx + 8] 6089 lea edx, [edx + 8]
6090 sub ecx, 2 6090 sub ecx, 2
6091 jg convertloop 6091 jg convertloop
6092 vzeroupper 6092 vzeroupper
6093 ret 6093 ret
6094 } 6094 }
6095 } 6095 }
6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2
6097 6097
6098 #ifdef HAS_HALFFLOATROW_SSE2
6099 static float kExpBias = 1.9259299444e-34f;
6100 __declspec(naked)
6101 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
6102 __asm {
6103 mov eax, [esp + 4] /* src */
6104 mov edx, [esp + 8] /* dst */
6105 movd xmm4, dword ptr [esp + 12] /* scale */
6106 mov ecx, [esp + 16] /* width */
6107 mulss xmm4, kExpBias
6108 pshufd xmm4, xmm4, 0
6109 pxor xmm5, xmm5
6110
6111 // 8 pixel loop.
6112 convertloop:
6113 movdqu xmm2, xmmword ptr [eax] // 8 shorts
6114 lea eax, [eax + 16]
6115 movdqa xmm3, xmm2
6116 punpcklwd xmm2, xmm5
6117 cvtdq2ps xmm2, xmm2 // convert 8 ints to floats
6118 punpckhwd xmm3, xmm5
6119 cvtdq2ps xmm3, xmm3
6120 mulps xmm2, xmm4
6121 mulps xmm3, xmm4
6122 psrld xmm2, 13
6123 psrld xmm3, 13
6124 packssdw xmm2, xmm3
6125 movdqu [edx], xmm2
6126 lea edx, [edx + 16]
6127 sub ecx, 8
6128 jg convertloop
6129 ret
6130 }
6131 }
6132 #endif // HAS_HALFFLOATROW_SSE2
6133
6098 #ifdef HAS_HALFFLOATROW_AVX2 6134 #ifdef HAS_HALFFLOATROW_AVX2
6099 __declspec(naked) 6135 __declspec(naked)
6100 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { 6136 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
6101 __asm { 6137 __asm {
6102 mov eax, [esp + 4] /* src */ 6138 mov eax, [esp + 4] /* src */
6103 mov edx, [esp + 8] /* dst */ 6139 mov edx, [esp + 8] /* dst */
6104 vbroadcastss ymm4, [esp + 12] /* scale */ 6140 vbroadcastss ymm4, [esp + 12] /* scale */
6105 mov ecx, [esp + 16] /* width */ 6141 mov ecx, [esp + 16] /* width */
6106 6142
6107 // 8 pixel loop. 6143 // 8 pixel loop.
6108 convertloop: 6144 convertloop:
6109 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints 6145 vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints
6110 vpmovzxwd ymm1, xmmword ptr [eax + 16] // 8 more shorts 6146 vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts
6111 lea eax, [eax + 32] 6147 lea eax, [eax + 32]
6112 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats 6148 vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats
6113 vcvtdq2ps ymm1, ymm1 6149 vcvtdq2ps ymm3, ymm3
6114 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 6150 vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1
6115 vmulps ymm1, ymm1, ymm4 6151 vmulps ymm3, ymm3, ymm4
6116 vcvtps2ph xmm0, ymm0, 3 // float convert to 8 half floats truncate 6152 vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate
6117 vcvtps2ph xmm1, ymm1, 3 6153 vcvtps2ph xmm3, ymm3, 3
6118 vmovdqu [edx], xmm0 6154 vmovdqu [edx], xmm2
6119 vmovdqu [edx + 16], xmm1 6155 vmovdqu [edx + 16], xmm3
6120 lea edx, [edx + 32] 6156 lea edx, [edx + 32]
6121 sub ecx, 16 6157 sub ecx, 16
6122 jg convertloop 6158 jg convertloop
6123 vzeroupper 6159 vzeroupper
6124 ret 6160 ret
6125 } 6161 }
6126 } 6162 }
6127 #endif // HAS_HALFFLOATROW_AVX2 6163 #endif // HAS_HALFFLOATROW_AVX2
6128 6164
6129 #ifdef HAS_ARGBCOLORTABLEROW_X86 6165 #ifdef HAS_ARGBCOLORTABLEROW_X86
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after
6291 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6327 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6292 6328
6293 #endif // defined(_M_X64) 6329 #endif // defined(_M_X64)
6294 6330
6295 #ifdef __cplusplus 6331 #ifdef __cplusplus
6296 } // extern "C" 6332 } // extern "C"
6297 } // namespace libyuv 6333 } // namespace libyuv
6298 #endif 6334 #endif
6299 6335
6300 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6336 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698