| OLD | NEW |
| (Empty) |
| 1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 ; Use of this source code is governed by a BSD-style license that can be | |
| 3 ; found in the LICENSE file. | |
| 4 | |
| 5 %include "media/base/simd/media_export.asm" | |
| 6 %include "third_party/x86inc/x86inc.asm" | |
| 7 | |
| 8 ; | |
| 9 ; This file uses MMX instructions. | |
| 10 ; | |
| 11 SECTION_TEXT | |
| 12 CPU MMX | |
| 13 | |
| 14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8_t* y_buf, | |
| 15 ; const uint8_t* u_buf, | |
| 16 ; const uint8_t* v_buf, | |
| 17 ; uint8_t* rgb_buf, | |
| 18 ; ptrdiff_t width, | |
| 19 ; ptrdiff_t source_dx); | |
| 20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64 | |
| 21 EXPORT SYMBOL | |
| 22 align function_align | |
| 23 | |
| 24 mangle(SYMBOL): | |
| 25 %assign stack_offset 0 | |
| 26 | |
| 27 ; Parameters are in the following order: | |
| 28 ; 1. Y plane | |
| 29 ; 2. U plane | |
| 30 ; 3. V plane | |
| 31 ; 4. ARGB frame | |
| 32 ; 5. Width | |
| 33 ; 6. Source dx | |
| 34 ; 7. Conversion lookup table | |
| 35 | |
| 36 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 | |
| 37 | |
| 38 %define TABLEq r10 | |
| 39 %define Xq r11 | |
| 40 %define INDEXq r12 | |
| 41 %define COMPRd r13d | |
| 42 %define COMPRq r13 | |
| 43 %define FRACTIONq r14 | |
| 44 %define COMPL R1 | |
| 45 %define COMPLq R1q | |
| 46 %define COMPLd R1d | |
| 47 | |
| 48 PUSH TABLEq | |
| 49 PUSH Xq | |
| 50 PUSH INDEXq | |
| 51 PUSH COMPRq | |
| 52 PUSH FRACTIONq | |
| 53 | |
| 54 %macro EPILOGUE 0 | |
| 55 POP FRACTIONq | |
| 56 POP COMPRq | |
| 57 POP INDEXq | |
| 58 POP Xq | |
| 59 POP TABLEq | |
| 60 %endmacro | |
| 61 | |
| 62 mov TABLEq, R1q | |
| 63 | |
| 64 imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx | |
| 65 xor Xq, Xq ; x = 0 | |
| 66 cmp SOURCE_DXq, 0x20000 | |
| 67 jl .lscaleend | |
| 68 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less | |
| 69 jmp .lscaleend | |
| 70 | |
| 71 .lscaleloop: | |
| 72 ; Interpolate U | |
| 73 mov INDEXq, Xq | |
| 74 sar INDEXq, 0x11 | |
| 75 movzx COMPLd, BYTE [Uq + INDEXq] | |
| 76 movzx COMPRd, BYTE [Uq + INDEXq + 1] | |
| 77 mov FRACTIONq, Xq | |
| 78 and FRACTIONq, 0x1fffe | |
| 79 imul COMPRq, FRACTIONq | |
| 80 xor FRACTIONq, 0x1fffe | |
| 81 imul COMPLq, FRACTIONq | |
| 82 add COMPLq, COMPRq | |
| 83 shr COMPLq, 17 | |
| 84 movq mm0, [TABLEq + 2048 + 8 * COMPLq] | |
| 85 | |
| 86 ; Interpolate V | |
| 87 movzx COMPLd, BYTE [Vq + INDEXq] | |
| 88 movzx COMPRd, BYTE [Vq + INDEXq + 1] | |
| 89 ; Trick here to imul COMPL first then COMPR. | |
| 90 ; Saves two instruction. :) | |
| 91 imul COMPLq, FRACTIONq | |
| 92 xor FRACTIONq, 0x1fffe | |
| 93 imul COMPRq, FRACTIONq | |
| 94 add COMPLq, COMPRq | |
| 95 shr COMPLq, 17 | |
| 96 paddsw mm0, [TABLEq + 4096 + 8 * COMPLq] | |
| 97 | |
| 98 ; Interpolate first Y1. | |
| 99 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel. | |
| 100 ; Xq points to current pixel. | |
| 101 mov FRACTIONq, Xq | |
| 102 sar Xq, 0x10 | |
| 103 movzx COMPLd, BYTE [Yq + Xq] | |
| 104 movzx COMPRd, BYTE [Yq + Xq + 1] | |
| 105 and FRACTIONq, 0xffff | |
| 106 imul COMPRq, FRACTIONq | |
| 107 xor FRACTIONq, 0xffff | |
| 108 imul COMPLq, FRACTIONq | |
| 109 add COMPLq, COMPRq | |
| 110 shr COMPLq, 16 | |
| 111 movq mm1, [TABLEq + 8 * COMPLq] | |
| 112 | |
| 113 ; Interpolate Y2 if available. | |
| 114 cmp INDEXq, WIDTHq | |
| 115 jge .lscalelastpixel | |
| 116 | |
| 117 lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel. | |
| 118 ; INDEXq points to current pixel. | |
| 119 mov FRACTIONq, INDEXq | |
| 120 sar INDEXq, 0x10 | |
| 121 movzx COMPLd, BYTE [Yq + INDEXq] | |
| 122 movzx COMPRd, BYTE [Yq + INDEXq + 1] | |
| 123 and FRACTIONq, 0xffff | |
| 124 imul COMPRq, FRACTIONq | |
| 125 xor FRACTIONq, 0xffff | |
| 126 imul COMPLq, FRACTIONq | |
| 127 add COMPLq, COMPRq | |
| 128 shr COMPLq, 16 | |
| 129 movq mm2, [TABLEq + 8 * COMPLq] | |
| 130 | |
| 131 paddsw mm1, mm0 | |
| 132 paddsw mm2, mm0 | |
| 133 psraw mm1, 0x6 | |
| 134 psraw mm2, 0x6 | |
| 135 packuswb mm1, mm2 | |
| 136 movntq [ARGBq], mm1 | |
| 137 add ARGBq, 0x8 | |
| 138 | |
| 139 .lscaleend: | |
| 140 cmp Xq, WIDTHq | |
| 141 jl .lscaleloop | |
| 142 jmp .epilogue | |
| 143 | |
| 144 .lscalelastpixel: | |
| 145 paddsw mm1, mm0 | |
| 146 psraw mm1, 6 | |
| 147 packuswb mm1, mm1 | |
| 148 movd [ARGBq], mm1 | |
| 149 | |
| 150 .epilogue | |
| 151 EPILOGUE | |
| 152 RET | |
| OLD | NEW |