OLD | NEW |
| (Empty) |
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 ; Use of this source code is governed by a BSD-style license that can be | |
3 ; found in the LICENSE file. | |
4 | |
5 %include "media/base/simd/media_export.asm" | |
6 %include "third_party/x86inc/x86inc.asm" | |
7 | |
8 ; | |
9 ; This file uses MMX, SSE2 and instructions. | |
10 ; | |
11 SECTION_TEXT | |
12 CPU SSE2 | |
13 | |
14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8_t* y_buf, | |
15 ; const uint8_t* u_buf, | |
16 ; const uint8_t* v_buf, | |
17 ; uint8_t* rgb_buf, | |
18 ; ptrdiff_t width, | |
19 ; ptrdiff_t source_dx); | |
20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64 | |
21 EXPORT SYMBOL | |
22 align function_align | |
23 | |
24 mangle(SYMBOL): | |
25 %assign stack_offset 0 | |
26 | |
27 ; Parameters are in the following order: | |
28 ; 1. Y plane | |
29 ; 2. U plane | |
30 ; 3. V plane | |
31 ; 4. ARGB frame | |
32 ; 5. Width | |
33 ; 6. Source dx | |
34 ; 7. Convert table | |
35 | |
36 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1 | |
37 | |
38 %define TABLEq r10 | |
39 %define Xq r11 | |
40 %define INDEXq r12 | |
41 %define COMPq R1q | |
42 %define COMPd R1d | |
43 | |
44 PUSH r10 | |
45 PUSH r11 | |
46 PUSH r12 | |
47 | |
48 mov TABLEq, R1q | |
49 | |
50 ; Set Xq index to 0. | |
51 xor Xq, Xq | |
52 jmp .scaleend | |
53 | |
54 .scaleloop: | |
55 ; Read UV pixels. | |
56 mov INDEXq, Xq | |
57 sar INDEXq, 17 | |
58 movzx COMPd, BYTE [Uq + INDEXq] | |
59 movq xmm0, [TABLEq + 2048 + 8 * COMPq] | |
60 movzx COMPd, BYTE [Vq + INDEXq] | |
61 movq xmm1, [TABLEq + 4096 + 8 * COMPq] | |
62 | |
63 ; Read first Y pixel. | |
64 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel. | |
65 sar Xq, 16 | |
66 movzx COMPd, BYTE [Yq + Xq] | |
67 paddsw xmm0, xmm1 ; Hide a ADD after memory load. | |
68 movq xmm1, [TABLEq + 8 * COMPq] | |
69 | |
70 ; Read next Y pixel. | |
71 lea Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel. | |
72 sar INDEXq, 16 | |
73 movzx COMPd, BYTE [Yq + INDEXq] | |
74 movq xmm2, [TABLEq + 8 * COMPq] | |
75 paddsw xmm1, xmm0 | |
76 paddsw xmm2, xmm0 | |
77 shufps xmm1, xmm2, 0x44 ; Join two pixels into one XMM register | |
78 psraw xmm1, 6 | |
79 packuswb xmm1, xmm1 | |
80 movq QWORD [ARGBq], xmm1 | |
81 add ARGBq, 8 | |
82 | |
83 .scaleend: | |
84 sub WIDTHq, 2 | |
85 jns .scaleloop | |
86 | |
87 and WIDTHq, 1 ; odd number of pixels? | |
88 jz .scaledone | |
89 | |
90 ; Read U V components. | |
91 mov INDEXq, Xq | |
92 sar INDEXq, 17 | |
93 movzx COMPd, BYTE [Uq + INDEXq] | |
94 movq xmm0, [TABLEq + 2048 + 8 * COMPq] | |
95 movzx COMPd, BYTE [Vq + INDEXq] | |
96 movq xmm1, [TABLEq + 4096 + 8 * COMPq] | |
97 paddsw xmm0, xmm1 | |
98 | |
99 ; Read one Y component. | |
100 mov INDEXq, Xq | |
101 sar INDEXq, 16 | |
102 movzx COMPd, BYTE [Yq + INDEXq] | |
103 movq xmm1, [TABLEq + 8 * COMPq] | |
104 paddsw xmm1, xmm0 | |
105 psraw xmm1, 6 | |
106 packuswb xmm1, xmm1 | |
107 movd DWORD [ARGBq], xmm1 | |
108 | |
109 .scaledone: | |
110 POP r12 | |
111 POP r11 | |
112 POP r10 | |
113 RET | |
OLD | NEW |