| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS. All contributing project authors may | 7  *  in the file PATENTS. All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| (...skipping 6041 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 6052   } | 6052   } | 
| 6053 } | 6053 } | 
| 6054 #endif  // HAS_HALFFLOATROW_SSE2 | 6054 #endif  // HAS_HALFFLOATROW_SSE2 | 
| 6055 | 6055 | 
| 6056 #ifdef HAS_HALFFLOATROW_AVX2 | 6056 #ifdef HAS_HALFFLOATROW_AVX2 | 
| 6057 __declspec(naked) | 6057 __declspec(naked) | 
| 6058 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 6058 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { | 
| 6059   __asm { | 6059   __asm { | 
| 6060     mov        eax, [esp + 4]      /* src */ | 6060     mov        eax, [esp + 4]      /* src */ | 
| 6061     mov        edx, [esp + 8]      /* dst */ | 6061     mov        edx, [esp + 8]      /* dst */ | 
|  | 6062     movd       xmm4, dword ptr [esp + 12]  /* scale */ | 
|  | 6063     mov        ecx, [esp + 16]     /* width */ | 
|  | 6064 | 
|  | 6065     vmulss     xmm4, xmm4, kExpBias | 
|  | 6066     vbroadcastss ymm4, xmm4 | 
|  | 6067     vpxor      ymm5, ymm5, ymm5 | 
|  | 6068 | 
|  | 6069     // 16 pixel loop. | 
|  | 6070  convertloop: | 
|  | 6071     vmovdqu     ymm2, [eax]  // 16 shorts | 
|  | 6072     lea         eax, [eax + 32] | 
|  | 6073     vpunpckhwd  ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints | 
|  | 6074     vpunpcklwd  ymm2, ymm2, ymm5 | 
|  | 6075     vcvtdq2ps   ymm3, ymm3  // convert 16 ints to floats | 
|  | 6076     vcvtdq2ps   ymm2, ymm2 | 
|  | 6077     vmulps      ymm3, ymm3, ymm4  // scale to adjust exponent for 5 bit range. | 
|  | 6078     vmulps      ymm2, ymm2, ymm4 | 
|  | 6079     vpsrld      ymm3, ymm3, 13  // float convert to 8 half floats truncate | 
|  | 6080     vpsrld      ymm2, ymm2, 13 | 
|  | 6081     vpackssdw   ymm2, ymm2, ymm3 | 
|  | 6082     vmovdqu     [edx], ymm2 | 
|  | 6083     lea         edx, [edx + 32] | 
|  | 6084     sub         ecx, 16 | 
|  | 6085     jg          convertloop | 
|  | 6086     vzeroupper | 
|  | 6087     ret | 
|  | 6088   } | 
|  | 6089 } | 
|  | 6090 #endif  // HAS_HALFFLOATROW_AVX2 | 
|  | 6091 | 
|  | 6092 #ifdef HAS_HALFFLOATROW_F16C | 
|  | 6093 __declspec(naked) | 
|  | 6094 void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { | 
|  | 6095   __asm { | 
|  | 6096     mov        eax, [esp + 4]      /* src */ | 
|  | 6097     mov        edx, [esp + 8]      /* dst */ | 
| 6062     vbroadcastss ymm4, [esp + 12]  /* scale */ | 6098     vbroadcastss ymm4, [esp + 12]  /* scale */ | 
| 6063     mov        ecx, [esp + 16]     /* width */ | 6099     mov        ecx, [esp + 16]     /* width */ | 
| 6064 | 6100 | 
| 6065     // 8 pixel loop. | 6101     // 16 pixel loop. | 
| 6066  convertloop: | 6102  convertloop: | 
| 6067     vpmovzxwd   ymm2, xmmword ptr [eax]  // 8 shorts -> 8 ints | 6103     vpmovzxwd   ymm2, xmmword ptr [eax]  // 8 shorts -> 8 ints | 
| 6068     vpmovzxwd   ymm3, xmmword ptr [eax + 16]  // 8 more shorts | 6104     vpmovzxwd   ymm3, xmmword ptr [eax + 16]  // 8 more shorts | 
| 6069     lea         eax, [eax + 32] | 6105     lea         eax, [eax + 32] | 
| 6070     vcvtdq2ps   ymm2, ymm2        // convert 8 ints to floats | 6106     vcvtdq2ps   ymm2, ymm2        // convert 8 ints to floats | 
| 6071     vcvtdq2ps   ymm3, ymm3 | 6107     vcvtdq2ps   ymm3, ymm3 | 
| 6072     vmulps      ymm2, ymm2, ymm4  // scale to normalized range 0 to 1 | 6108     vmulps      ymm2, ymm2, ymm4  // scale to normalized range 0 to 1 | 
| 6073     vmulps      ymm3, ymm3, ymm4 | 6109     vmulps      ymm3, ymm3, ymm4 | 
| 6074     vcvtps2ph   xmm2, ymm2, 3     // float convert to 8 half floats truncate | 6110     vcvtps2ph   xmm2, ymm2, 3     // float convert to 8 half floats truncate | 
| 6075     vcvtps2ph   xmm3, ymm3, 3 | 6111     vcvtps2ph   xmm3, ymm3, 3 | 
| 6076     vmovdqu     [edx], xmm2 | 6112     vmovdqu     [edx], xmm2 | 
| 6077     vmovdqu     [edx + 16], xmm3 | 6113     vmovdqu     [edx + 16], xmm3 | 
| 6078     lea         edx, [edx + 32] | 6114     lea         edx, [edx + 32] | 
| 6079     sub         ecx, 16 | 6115     sub         ecx, 16 | 
| 6080     jg          convertloop | 6116     jg          convertloop | 
| 6081     vzeroupper | 6117     vzeroupper | 
| 6082     ret | 6118     ret | 
| 6083   } | 6119   } | 
| 6084 } | 6120 } | 
| 6085 #endif  // HAS_HALFFLOATROW_AVX2 | 6121 #endif  // HAS_HALFFLOATROW_F16C | 
| 6086 | 6122 | 
| 6087 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 6123 #ifdef HAS_ARGBCOLORTABLEROW_X86 | 
| 6088 // Tranform ARGB pixels with color table. | 6124 // Tranform ARGB pixels with color table. | 
| 6089 __declspec(naked) | 6125 __declspec(naked) | 
| 6090 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 6126 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, | 
| 6091                            int width) { | 6127                            int width) { | 
| 6092   __asm { | 6128   __asm { | 
| 6093     push       esi | 6129     push       esi | 
| 6094     mov        eax, [esp + 4 + 4]   /* dst_argb */ | 6130     mov        eax, [esp + 4 + 4]   /* dst_argb */ | 
| 6095     mov        esi, [esp + 4 + 8]   /* table_argb */ | 6131     mov        esi, [esp + 4 + 8]   /* table_argb */ | 
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 6249 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6285 #endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 
| 6250 | 6286 | 
| 6251 #endif  // defined(_M_X64) | 6287 #endif  // defined(_M_X64) | 
| 6252 | 6288 | 
| 6253 #ifdef __cplusplus | 6289 #ifdef __cplusplus | 
| 6254 }  // extern "C" | 6290 }  // extern "C" | 
| 6255 }  // namespace libyuv | 6291 }  // namespace libyuv | 
| 6256 #endif | 6292 #endif | 
| 6257 | 6293 | 
| 6258 #endif  // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6294 #endif  // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 
| OLD | NEW | 
|---|