source/row_win.cc - Issue 2371293002: Add low level support for 12 bit 420, 422 and 444 YUV video frame conversion.

Side by Side Diff: source/row_win.cc

Issue 2371293002: Add low level support for 12 bit 420, 422 and 444 YUV video frame conversion. (Closed)

Patch Set: cast to uint16 Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 6077 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6088 vmovq qword ptr [edx], xmm0	6088 vmovq qword ptr [edx], xmm0

6089 lea edx, [edx + 8]	6089 lea edx, [edx + 8]

6090 sub ecx, 2	6090 sub ecx, 2

6091 jg convertloop	6091 jg convertloop

6092 vzeroupper	6092 vzeroupper

6093 ret	6093 ret

6094 }	6094 }

6095 }	6095 }

6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2	6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2

6097	6097

6098 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor	6098 #ifdef HAS_HALFFLOATROW_AVX2

6099 // adjust the sample range to 0 to 1 using a float multiply.

6100 // e.g. 9 bit scale is 1.0f / 512.0f

6101 // e.g. 10 bit scale is 1.0f / 1024.0f

6102 #ifdef HAS_SHORTTOHALFFLOAT_AVX2

6103 __declspec(naked)	6099 __declspec(naked)

6104 void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width) {	6100 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {

6105 __asm {	6101 __asm {

6106 mov eax, [esp + 4] /* src */	6102 mov eax, [esp + 4] /* src */

6107 mov edx, [esp + 8] /* dst */	6103 mov edx, [esp + 8] /* dst */

6108 vbroadcastss ymm4, [esp + 12] /* scale */	6104 vbroadcastss ymm4, [esp + 12] /* scale */

6109 mov ecx, [esp + 16] /* width */	6105 mov ecx, [esp + 16] /* width */

6110	6106

6111 // 8 pixel loop.	6107 // 8 pixel loop.

6112 convertloop:	6108 convertloop:

6113 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints	6109 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints

6114 lea eax, [eax + 16]	6110 vpmovzxwd ymm1, xmmword ptr [eax + 16] // 8 more shorts

	6111 lea eax, [eax + 32]

6115 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats	6112 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats

	6113 vcvtdq2ps ymm1, ymm1

6116 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1	6114 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1

6117 vcvtps2ph xmm0, ymm0, 0 // float conver to 8 half floats round even	6115 vmulps ymm1, ymm1, ymm4

	6116 vcvtps2ph xmm0, ymm0, 3 // float convert to 8 half floats truncate

	6117 vcvtps2ph xmm1, ymm1, 3

6118 vmovdqu [edx], xmm0	6118 vmovdqu [edx], xmm0

6119 lea edx, [edx + 16]	6119 vmovdqu [edx + 16], xmm1

6120 sub ecx, 8	6120 lea edx, [edx + 32]

	6121 sub ecx, 16

6121 jg convertloop	6122 jg convertloop

6122 vzeroupper	6123 vzeroupper

6123 ret	6124 ret

6124 }	6125 }

6125 }	6126 }

6126 #endif // HAS_SHORTTOHALFFLOAT_AVX2	6127 #endif // HAS_HALFFLOATROW_AVX2

6127	6128

6128 #ifdef HAS_ARGBCOLORTABLEROW_X86	6129 #ifdef HAS_ARGBCOLORTABLEROW_X86

6129 // Tranform ARGB pixels with color table.	6130 // Tranform ARGB pixels with color table.

6130 __declspec(naked)	6131 __declspec(naked)

6131 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,	6132 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,

6132 int width) {	6133 int width) {

6133 __asm {	6134 __asm {

6134 push esi	6135 push esi

6135 mov eax, [esp + 4 + 4] /* dst_argb */	6136 mov eax, [esp + 4 + 4] /* dst_argb */

6136 mov esi, [esp + 4 + 8] /* table_argb */	6137 mov esi, [esp + 4 + 8] /* table_argb */

(...skipping 153 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6290 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3	6291 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3

6291	6292

6292 #endif // defined(_M_X64)	6293 #endif // defined(_M_X64)

6293	6294

6294 #ifdef __cplusplus	6295 #ifdef __cplusplus

6295 } // extern "C"	6296 } // extern "C"

6296 } // namespace libyuv	6297 } // namespace libyuv

6297 #endif	6298 #endif

6298	6299

6299 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) \|\| defined(_M_X64))	6300 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) \|\| defined(_M_X64))

OLD	NEW

« no previous file with comments | « source/row_gcc.cc ('k') | unit_test/planar_test.cc » ('j') | no next file with comments »