OLD | NEW |
| (Empty) |
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 ; Use of this source code is governed by a BSD-style license that can be | |
3 ; found in the LICENSE file. | |
4 | |
5 ; | |
6 ; void SYMBOL(const uint8_t* argb, uint8_t* y, uint8_t* u, uint8_t* v, int width
); | |
7 ; | |
8 ; The main code that converts RGB pixels to YUV pixels. This function roughly | |
9 ; consists of three parts: converting one ARGB pixel to YUV pixels, converting | |
10 ; two ARGB pixels to YUV pixels, and converting four ARGB pixels to YUV pixels. | |
11 ; To write the structure of this function in C, it becomes the snippet listed | |
12 ; below. | |
13 ; | |
14 ; if (width & 1) { | |
15 ; --width; | |
16 ; // Convert one ARGB pixel to one Y pixel, one U pixel, and one V pixel. | |
17 ; } | |
18 ; | |
19 ; if (width & 2) { | |
20 ; width -= 2; | |
21 ; // Convert two ARGB pixels to two Y pixels, one U pixel, and one V pixel. | |
22 ; } | |
23 ; | |
24 ; while (width) { | |
25 ; width -= 4; | |
26 ; // Convert four ARGB pixels to four Y pixels, two U pixels, and two V | |
27 ; // pixels. | |
28 ; } | |
29 ; | |
30 EXPORT SYMBOL | |
31 align function_align | |
32 | |
33 mangle(SYMBOL): | |
34 %assign stack_offset 0 | |
35 PROLOGUE 5, 6, 8, ARGB, Y, U, V, WIDTH, TEMP | |
36 | |
37 ; Initialize constants used in this function. (We use immediates to avoid | |
38 ; dependency onto GOT.) | |
39 LOAD_XMM XMM_CONST_Y0, 0x00420219 | |
40 LOAD_XMM XMM_CONST_Y1, 0x00007F00 | |
41 LOAD_XMM XMM_CONST_U, 0x00DAB670 | |
42 LOAD_XMM XMM_CONST_V, 0x0070A2EE | |
43 LOAD_XMM XMM_CONST_128, 0x00800080 | |
44 | |
45 .convert_one_pixel: | |
46 ; Divide the input width by two so it represents the offsets for u[] and v[]. | |
47 ; When the width is odd, We read the rightmost ARGB pixel and convert its | |
48 ; colorspace to YUV. This code stores one Y pixel, one U pixel, and one V | |
49 ; pixel. | |
50 sar WIDTHq, 1 | |
51 jnc .convert_two_pixels | |
52 | |
53 ; Read one ARGB (or RGB) pixel. | |
54 READ_ARGB xmm0, 1 | |
55 | |
56 ; Calculate y[0] from one RGB pixel read above. | |
57 CALC_Y xmm1, xmm0 | |
58 movd TEMPd, xmm1 | |
59 mov BYTE [Yq + WIDTHq * 2], TEMPb | |
60 | |
61 ; Calculate u[0] from one RGB pixel read above. If this is an odd line, the | |
62 ; output pixel contains the U value calculated in the previous call. We also | |
63 ; read this pixel and calculate their average. | |
64 INIT_UV TEMPd, Uq, 4 | |
65 CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd | |
66 movd TEMPd, xmm1 | |
67 mov BYTE [Uq + WIDTHq], TEMPb | |
68 | |
69 ; Calculate v[0] from one RGB pixel. Same as u[0], we read the result of the | |
70 ; previous call and get their average. | |
71 INIT_UV TEMPd, Uq, 4 | |
72 CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd | |
73 movd TEMPd, xmm1 | |
74 mov BYTE [Vq + WIDTHq], TEMPb | |
75 | |
76 .convert_two_pixels: | |
77 ; If the input width is not a multiple of four, read the rightmost two ARGB | |
78 ; pixels and convert their colorspace to YUV. This code stores two Y pixels, | |
79 ; one U pixel, and one V pixel. | |
80 test WIDTHb, 2 / 2 | |
81 jz .convert_four_pixels | |
82 sub WIDTHb, 2 / 2 | |
83 | |
84 ; Read two ARGB (or RGB) pixels. | |
85 READ_ARGB xmm0, 2 | |
86 | |
87 ; Calculate r[0] and r[1] from two RGB pixels read above. | |
88 CALC_Y xmm1, xmm0 | |
89 movd TEMPd, xmm1 | |
90 mov WORD [Yq + WIDTHq * 2], TEMPw | |
91 | |
92 ; Skip calculating u and v if the output buffer is NULL. | |
93 test Uq, Uq | |
94 jz .convert_four_pixels | |
95 | |
96 ; Calculate u[0] from two RGB pixels read above. (For details, read the above | |
97 ; comment in .convert_one_pixel). | |
98 INIT_UV TEMPd, Uq, 2 | |
99 CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd | |
100 movd TEMPd, xmm1 | |
101 mov BYTE [Uq + WIDTHq], TEMPb | |
102 | |
103 ; Calculate v[0] from two RGB pixels read above. | |
104 INIT_UV TEMPd, Vq, 2 | |
105 CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd | |
106 movd TEMPd, xmm1 | |
107 mov BYTE [Vq + WIDTHq], TEMPb | |
108 | |
109 .convert_four_pixels: | |
110 ; Read four ARGB pixels and convert their colorspace to YUV. This code stores | |
111 ; four Y pixels, two U pixels, and two V pixels. | |
112 test WIDTHq, WIDTHq | |
113 jz .convert_finish | |
114 | |
115 %if PIXELSIZE == 4 | |
116 ; Check if the input buffer is aligned to a 16-byte boundary and use movdqa | |
117 ; for reading the ARGB pixels. | |
118 test ARGBw, 15 | |
119 jnz .convert_four_pixels_unaligned | |
120 | |
121 .convert_four_pixels_aligned: | |
122 sub WIDTHq, 4 / 2 | |
123 | |
124 ; Read four ARGB pixels. (We can use movdqa here since we have checked if the | |
125 ; source address is aligned.) | |
126 movdqa xmm0, DQWORD [ARGBq + WIDTHq * 4 * 2] | |
127 | |
128 ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels. | |
129 CALC_Y xmm1, xmm0 | |
130 movd DWORD [Yq + WIDTHq * 2], xmm1 | |
131 | |
132 %if SUBSAMPLING == 0 | |
133 ; Skip calculating u and v if the output buffer is NULL, which means we are | |
134 ; converting an odd line. (When we enable subsampling, these buffers must | |
135 ; contain the u and v values for the previous call, i.e. these variables must | |
136 ; not be NULL.) | |
137 test Uq, Uq | |
138 jz .convert_four_pixels_aligned_next | |
139 %endif | |
140 | |
141 ; Calculate u[0] and u[1] from four ARGB pixels read above. | |
142 INIT_UV TEMPd, Uq, 4 | |
143 CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd | |
144 movd TEMPd, xmm1 | |
145 mov WORD [Uq + WIDTHq], TEMPw | |
146 | |
147 ; Calculate v[0] and v[1] from four ARGB pixels read above. | |
148 INIT_UV TEMPd, Vq, 4 | |
149 CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd | |
150 movd TEMPd, xmm1 | |
151 mov WORD [Vq + WIDTHq], TEMPw | |
152 | |
153 %if SUBSAMPLING == 0 | |
154 .convert_four_pixels_aligned_next: | |
155 %endif | |
156 | |
157 test WIDTHq, WIDTHq | |
158 jnz .convert_four_pixels_aligned | |
159 | |
160 jmp .convert_finish | |
161 %endif | |
162 | |
163 .convert_four_pixels_unaligned: | |
164 sub WIDTHq, 4 / 2 | |
165 | |
166 ; Read four ARGB (or RGB) pixels. | |
167 READ_ARGB xmm0, 4 | |
168 | |
169 ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels. | |
170 CALC_Y xmm1, xmm0 | |
171 movd DWORD [Yq + WIDTHq * 2], xmm1 | |
172 | |
173 %if SUBSAMPLING == 0 | |
174 ; Skip calculating u and v if the output buffer is NULL. | |
175 test Uq, Uq | |
176 jz .convert_four_pixels_unaligned_next | |
177 %endif | |
178 | |
179 ; Calculate u[0] and u[1] from the input ARGB pixels. | |
180 INIT_UV TEMPd, Uq, 4 | |
181 CALC_UV xmm1, xmm0, XMM_CONST_U, TEMPd | |
182 movd TEMPd, xmm1 | |
183 mov WORD [Uq + WIDTHq], TEMPw | |
184 | |
185 ; Calculate v[0] and v[1] from the input ARGB pixels. | |
186 INIT_UV TEMPd, Vq, 4 | |
187 CALC_UV xmm1, xmm0, XMM_CONST_V, TEMPd | |
188 movd TEMPd, xmm1 | |
189 mov WORD [Vq + WIDTHq], TEMPw | |
190 | |
191 %if SUBSAMPLING == 0 | |
192 .convert_four_pixels_unaligned_next: | |
193 %endif | |
194 | |
195 test WIDTHq, WIDTHq | |
196 jnz .convert_four_pixels_unaligned | |
197 | |
198 .convert_finish: | |
199 ; Just exit this function since this is a void function. | |
200 RET | |
OLD | NEW |