 Chromium Code Reviews
 Chromium Code Reviews Issue 2364293002:
  ShortToHalfFloat_AVX2 function  (Closed)
    
  
    Issue 2364293002:
  ShortToHalfFloat_AVX2 function  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 
| 3 * | 3 * | 
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license | 
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source | 
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found | 
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may | 
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. | 
| 9 */ | 9 */ | 
| 10 | 10 | 
| (...skipping 6077 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6088 vmovq qword ptr [edx], xmm0 | 6088 vmovq qword ptr [edx], xmm0 | 
| 6089 lea edx, [edx + 8] | 6089 lea edx, [edx + 8] | 
| 6090 sub ecx, 2 | 6090 sub ecx, 2 | 
| 6091 jg convertloop | 6091 jg convertloop | 
| 6092 vzeroupper | 6092 vzeroupper | 
| 6093 ret | 6093 ret | 
| 6094 } | 6094 } | 
| 6095 } | 6095 } | 
| 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 6096 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 | 
| 6097 | 6097 | 
| 6098 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor | |
| 6099 // adjust the sample range to 0 to 1 using a float multiply. | |
| 6100 // e.g. 9 bit scale is 1.0f / 512.0f | |
| 6101 // e.g. 10 bit scale is 1.0f / 1024.0f | |
| 6102 #ifdef HAS_SHORTTOHALFFLOAT_AVX2 | |
| 6103 __declspec(naked) | |
| 6104 void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width) { | |
| 6105 __asm { | |
| 6106 mov eax, [esp + 4] /* src */ | |
| 6107 mov edx, [esp + 8] /* dst */ | |
| 6108 vbroadcastss ymm4, [esp + 12] /* scale */ | |
| 6109 mov ecx, [esp + 16] /* width */ | |
| 6110 | |
| 6111 // 8 pixel loop. | |
| 6112 convertloop: | |
| 6113 vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints | |
| 
fbarchard1
2016/09/27 22:14:19
note vpmovzxwd is avx2 but other instructions are
 | |
| 6114 lea eax, [eax + 16] | |
| 6115 vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats | |
| 6116 vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1 | |
| 6117 vcvtps2ph xmm0, ymm0, 0 // float conver to 8 half floats round even | |
| 6118 vmovdqu [edx], xmm0 | |
| 6119 lea edx, [edx + 16] | |
| 6120 sub ecx, 8 | |
| 6121 jg convertloop | |
| 6122 vzeroupper | |
| 6123 ret | |
| 6124 } | |
| 6125 } | |
| 6126 #endif // HAS_SHORTTOHALFFLOAT_AVX2 | |
| 6127 | |
| 6098 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 6128 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 
| 6099 // Tranform ARGB pixels with color table. | 6129 // Tranform ARGB pixels with color table. | 
| 6100 __declspec(naked) | 6130 __declspec(naked) | 
| 6101 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 6131 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 
| 6102 int width) { | 6132 int width) { | 
| 6103 __asm { | 6133 __asm { | 
| 6104 push esi | 6134 push esi | 
| 6105 mov eax, [esp + 4 + 4] /* dst_argb */ | 6135 mov eax, [esp + 4 + 4] /* dst_argb */ | 
| 6106 mov esi, [esp + 4 + 8] /* table_argb */ | 6136 mov esi, [esp + 4 + 8] /* table_argb */ | 
| 6107 mov ecx, [esp + 4 + 12] /* width */ | 6137 mov ecx, [esp + 4 + 12] /* width */ | 
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6260 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6290 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 
| 6261 | 6291 | 
| 6262 #endif // defined(_M_X64) | 6292 #endif // defined(_M_X64) | 
| 6263 | 6293 | 
| 6264 #ifdef __cplusplus | 6294 #ifdef __cplusplus | 
| 6265 } // extern "C" | 6295 } // extern "C" | 
| 6266 } // namespace libyuv | 6296 } // namespace libyuv | 
| 6267 #endif | 6297 #endif | 
| 6268 | 6298 | 
| 6269 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6299 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 
| OLD | NEW |