Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(263)

Side by Side Diff: source/row_win.cc

Issue 2371293002: Add low level support for 12 bit 420, 422 and 444 YUV video frame conversion. (Closed)
Patch Set: cast to uint16 Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | unit_test/planar_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 6077 matching lines...) Expand 10 before | Expand all | Expand 10 after
6088 vmovq qword ptr [edx], xmm0 6088 vmovq qword ptr [edx], xmm0
6089 lea edx, [edx + 8] 6089 lea edx, [edx + 8]
6090 sub ecx, 2 6090 sub ecx, 2
6091 jg convertloop 6091 jg convertloop
6092 vzeroupper 6092 vzeroupper
6093 ret 6093 ret
6094 } 6094 }
6095 } 6095 }
6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2
6097 6097
6098 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor 6098 #ifdef HAS_HALFFLOATROW_AVX2
6099 // adjust the sample range to 0 to 1 using a float multiply.
6100 // e.g. 9 bit scale is 1.0f / 512.0f
6101 // e.g. 10 bit scale is 1.0f / 1024.0f
6102 #ifdef HAS_SHORTTOHALFFLOAT_AVX2
6103 __declspec(naked) 6099 __declspec(naked)
6104 void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width) { 6100 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
6105 __asm { 6101 __asm {
6106 mov eax, [esp + 4] /* src */ 6102 mov eax, [esp + 4] /* src */
6107 mov edx, [esp + 8] /* dst */ 6103 mov edx, [esp + 8] /* dst */
6108 vbroadcastss ymm4, [esp + 12] /* scale */ 6104 vbroadcastss ymm4, [esp + 12] /* scale */
6109 mov ecx, [esp + 16] /* width */ 6105 mov ecx, [esp + 16] /* width */
6110 6106
6111 // 8 pixel loop. 6107 // 8 pixel loop.
6112 convertloop: 6108 convertloop:
6113 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints 6109 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints
6114 lea eax, [eax + 16] 6110 vpmovzxwd ymm1, xmmword ptr [eax + 16] // 8 more shorts
6111 lea eax, [eax + 32]
6115 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats 6112 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats
6113 vcvtdq2ps ymm1, ymm1
6116 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 6114 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1
6117 vcvtps2ph xmm0, ymm0, 0 // float conver to 8 half floats round even 6115 vmulps ymm1, ymm1, ymm4
6116 vcvtps2ph xmm0, ymm0, 3 // float convert to 8 half floats truncate
6117 vcvtps2ph xmm1, ymm1, 3
6118 vmovdqu [edx], xmm0 6118 vmovdqu [edx], xmm0
6119 lea edx, [edx + 16] 6119 vmovdqu [edx + 16], xmm1
6120 sub ecx, 8 6120 lea edx, [edx + 32]
6121 sub ecx, 16
6121 jg convertloop 6122 jg convertloop
6122 vzeroupper 6123 vzeroupper
6123 ret 6124 ret
6124 } 6125 }
6125 } 6126 }
6126 #endif // HAS_SHORTTOHALFFLOAT_AVX2 6127 #endif // HAS_HALFFLOATROW_AVX2
6127 6128
6128 #ifdef HAS_ARGBCOLORTABLEROW_X86 6129 #ifdef HAS_ARGBCOLORTABLEROW_X86
6129 // Tranform ARGB pixels with color table. 6130 // Tranform ARGB pixels with color table.
6130 __declspec(naked) 6131 __declspec(naked)
6131 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, 6132 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
6132 int width) { 6133 int width) {
6133 __asm { 6134 __asm {
6134 push esi 6135 push esi
6135 mov eax, [esp + 4 + 4] /* dst_argb */ 6136 mov eax, [esp + 4 + 4] /* dst_argb */
6136 mov esi, [esp + 4 + 8] /* table_argb */ 6137 mov esi, [esp + 4 + 8] /* table_argb */
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
6290 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6291 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6291 6292
6292 #endif // defined(_M_X64) 6293 #endif // defined(_M_X64)
6293 6294
6294 #ifdef __cplusplus 6295 #ifdef __cplusplus
6295 } // extern "C" 6296 } // extern "C"
6296 } // namespace libyuv 6297 } // namespace libyuv
6297 #endif 6298 #endif
6298 6299
6299 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6300 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | unit_test/planar_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698