| OLD | NEW |
| (Empty) |
| 1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 ; Use of this source code is governed by a BSD-style license that can be | |
| 3 ; found in the LICENSE file. | |
| 4 | |
| 5 %include "media/base/simd/media_export.asm" | |
| 6 %include "third_party/x86inc/x86inc.asm" | |
| 7 | |
| 8 ; | |
| 9 ; This file uses MMX, SSE2 and instructions. | |
| 10 ; | |
| 11 SECTION_TEXT | |
| 12 CPU SSE2 | |
| 13 | |
| 14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8_t* y_buf, | |
| 15 ; const uint8_t* u_buf, | |
| 16 ; const uint8_t* v_buf, | |
| 17 ; uint8_t* rgb_buf, | |
| 18 ; ptrdiff_t width, | |
| 19 ; ptrdiff_t source_dx); | |
| 20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 | |
| 21 EXPORT SYMBOL | |
| 22 align function_align | |
| 23 | |
| 24 mangle(SYMBOL): | |
| 25 %assign stack_offset 0 | |
| 26 | |
| 27 ; Parameters are in the following order: | |
| 28 ; 1. Y plane | |
| 29 ; 2. U plane | |
| 30 ; 3. V plane | |
| 31 ; 4. ARGB frame | |
| 32 ; 5. Width | |
| 33 ; 6. Source dx | |
| 34 ; 7. Convert table | |
| 35 | |
| 36 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 | |
| 37 | |
| 38 %define TABLEq r10 | |
| 39 %define Xq r11 | |
| 40 %define INDEXq r12 | |
| 41 %define COMPq R1q | |
| 42 %define COMPd R1d | |
| 43 | |
| 44 PUSH r10 | |
| 45 PUSH r11 | |
| 46 PUSH r12 | |
| 47 | |
| 48 mov TABLEq, R1q | |
| 49 | |
| 50 ; Set Xq index to 0. | |
| 51 xor Xq, Xq | |
| 52 jmp .scaleend | |
| 53 | |
| 54 .scaleloop: | |
| 55 ; Read UV pixels. | |
| 56 mov INDEXq, Xq | |
| 57 sar INDEXq, 17 | |
| 58 movzx COMPd, BYTE [Uq + INDEXq] | |
| 59 movq xmm0, [TABLEq + 2048 + 8 * COMPq] | |
| 60 movzx COMPd, BYTE [Vq + INDEXq] | |
| 61 movq xmm1, [TABLEq + 4096 + 8 * COMPq] | |
| 62 | |
| 63 ; Read first Y pixel. | |
| 64 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. | |
| 65 sar Xq, 16 | |
| 66 movzx COMPd, BYTE [Yq + Xq] | |
| 67 paddsw xmm0, xmm1 ; Hide a ADD after memory load. | |
| 68 movq xmm1, [TABLEq + 8 * COMPq] | |
| 69 | |
| 70 ; Read next Y pixel. | |
| 71 lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. | |
| 72 sar INDEXq, 16 | |
| 73 movzx COMPd, BYTE [Yq + INDEXq] | |
| 74 movq xmm2, [TABLEq + 8 * COMPq] | |
| 75 paddsw xmm1, xmm0 | |
| 76 paddsw xmm2, xmm0 | |
| 77 shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register | |
| 78 psraw xmm1, 6 | |
| 79 packuswb xmm1, xmm1 | |
| 80 movq QWORD [ARGBq], xmm1 | |
| 81 add ARGBq, 8 | |
| 82 | |
| 83 .scaleend: | |
| 84 sub WIDTHq, 2 | |
| 85 jns .scaleloop | |
| 86 | |
| 87 and WIDTHq, 1 ; odd number of pixels? | |
| 88 jz .scaledone | |
| 89 | |
| 90 ; Read U V components. | |
| 91 mov INDEXq, Xq | |
| 92 sar INDEXq, 17 | |
| 93 movzx COMPd, BYTE [Uq + INDEXq] | |
| 94 movq xmm0, [TABLEq + 2048 + 8 * COMPq] | |
| 95 movzx COMPd, BYTE [Vq + INDEXq] | |
| 96 movq xmm1, [TABLEq + 4096 + 8 * COMPq] | |
| 97 paddsw xmm0, xmm1 | |
| 98 | |
| 99 ; Read one Y component. | |
| 100 mov INDEXq, Xq | |
| 101 sar INDEXq, 16 | |
| 102 movzx COMPd, BYTE [Yq + INDEXq] | |
| 103 movq xmm1, [TABLEq + 8 * COMPq] | |
| 104 paddsw xmm1, xmm0 | |
| 105 psraw xmm1, 6 | |
| 106 packuswb xmm1, xmm1 | |
| 107 movd DWORD [ARGBq], xmm1 | |
| 108 | |
| 109 .scaledone: | |
| 110 POP r12 | |
| 111 POP r11 | |
| 112 POP r10 | |
| 113 RET | |
| OLD | NEW |