Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(323)

Side by Side Diff: media/base/yuv_row_win.cc

Issue 174442: mmx for linux yuv convert function.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/yuv_row_mac.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/base/yuv_row.h" 5 #include "media/base/yuv_row.h"
6 6
7 // Enable bilinear filtering by turning on the following macro. 7 extern "C" {
8 // #define MEDIA_BILINEAR_FILTER 1
9
10 namespace media {
11
12 #define RGBY(i) { \ 8 #define RGBY(i) { \
13 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ 9 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
14 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ 10 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
15 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ 11 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
16 0 \ 12 0 \
17 } 13 }
18 14
19 #define RGBU(i) { \ 15 #define RGBU(i) { \
20 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ 16 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
21 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ 17 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
22 0, \ 18 0, \
23 static_cast<int16>(256 * 64 - 1) \ 19 static_cast<int16>(256 * 64 - 1) \
24 } 20 }
25 21
26 #define RGBV(i) { \ 22 #define RGBV(i) { \
27 0, \ 23 0, \
28 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ 24 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
29 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ 25 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
30 0 \ 26 0 \
31 } 27 }
32 28
33 #define MMX_ALIGNED(var) __declspec(align(16)) var 29 #define MMX_ALIGNED(var) __declspec(align(16)) var
34 30
35 extern "C" { 31 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = {
36 MMX_ALIGNED(int16 coefficients_RGB_Y[256][4]) = {
37 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), 32 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
38 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), 33 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
39 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), 34 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
40 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), 35 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
41 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), 36 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
42 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), 37 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
43 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), 38 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
44 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), 39 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
45 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), 40 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
46 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), 41 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), 88 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
94 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), 89 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
95 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), 90 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
96 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), 91 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
97 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), 92 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
98 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), 93 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
99 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), 94 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
100 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), 95 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
101 }; 96 };
102 97
103 MMX_ALIGNED(int16 coefficients_RGB_U[256][4]) = { 98 MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = {
104 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), 99 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
105 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), 100 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
106 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), 101 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
107 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), 102 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
108 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), 103 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
109 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), 104 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
110 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), 105 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
111 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), 106 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
112 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), 107 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
113 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), 108 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), 155 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
161 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), 156 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
162 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), 157 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
163 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), 158 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
164 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), 159 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
165 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), 160 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
166 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), 161 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
167 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), 162 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
168 }; 163 };
169 164
170 MMX_ALIGNED(int16 coefficients_RGB_V[256][4]) = { 165 MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = {
171 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), 166 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
172 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), 167 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
173 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), 168 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
174 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), 169 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
175 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), 170 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
176 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), 171 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
177 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), 172 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
178 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), 173 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
179 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), 174 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
180 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), 175 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), 221 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
227 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), 222 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
228 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), 223 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
229 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), 224 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
230 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), 225 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
231 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), 226 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
232 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), 227 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
233 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), 228 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
234 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), 229 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
235 }; 230 };
236 } // extern "C"
237 231
238 #undef RGBHY 232 #undef RGBHY
239 #undef RGBY 233 #undef RGBY
240 #undef RGBU 234 #undef RGBU
241 #undef RGBV 235 #undef RGBV
242 #undef MMX_ALIGNED 236 #undef MMX_ALIGNED
243 237
244 // Warning C4799: function has no EMMS instruction. 238 // Warning C4799: function has no EMMS instruction.
245 // EMMS() is slow and should be called by the calling function once per image. 239 // EMMS() is slow and should be called by the calling function once per image.
246 #pragma warning(disable: 4799) 240 #pragma warning(disable: 4799)
247 241
248 __declspec(naked) 242 __declspec(naked)
249 void FastConvertYUVToRGB32Row(const uint8* y_buf, 243 void FastConvertYUVToRGB32Row(const uint8* y_buf,
250 const uint8* u_buf, 244 const uint8* u_buf,
251 const uint8* v_buf, 245 const uint8* v_buf,
252 uint8* rgb_buf, 246 uint8* rgb_buf,
253 int width) { 247 int width) {
254 __asm { 248 __asm {
255 pushad 249 pushad
256 mov edx, [esp + 32 + 4] // Y 250 mov edx, [esp + 32 + 4] // Y
257 mov edi, [esp + 32 + 8] // U 251 mov edi, [esp + 32 + 8] // U
258 mov esi, [esp + 32 + 12] // V 252 mov esi, [esp + 32 + 12] // V
259 mov ebp, [esp + 32 + 16] // rgb 253 mov ebp, [esp + 32 + 16] // rgb
260 mov ecx, [esp + 32 + 20] // width 254 mov ecx, [esp + 32 + 20] // width
261 jmp wend 255 jmp convertend
262 256
263 wloop : 257 convertloop :
264 movzx eax, byte ptr [edi] 258 movzx eax, byte ptr [edi]
265 add edi, 1 259 add edi, 1
266 movzx ebx, byte ptr [esi] 260 movzx ebx, byte ptr [esi]
267 add esi, 1 261 add esi, 1
268 movq mm0, [coefficients_RGB_U + 8 * eax] 262 movq mm0, [kCoefficientsRgbU + 8 * eax]
269 movzx eax, byte ptr [edx] 263 movzx eax, byte ptr [edx]
270 paddsw mm0, [coefficients_RGB_V + 8 * ebx] 264 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
271 movzx ebx, byte ptr [edx + 1] 265 movzx ebx, byte ptr [edx + 1]
272 movq mm1, [coefficients_RGB_Y + 8 * eax] 266 movq mm1, [kCoefficientsRgbY + 8 * eax]
273 add edx, 2 267 add edx, 2
274 movq mm2, [coefficients_RGB_Y + 8 * ebx] 268 movq mm2, [kCoefficientsRgbY + 8 * ebx]
275 paddsw mm1, mm0 269 paddsw mm1, mm0
276 paddsw mm2, mm0 270 paddsw mm2, mm0
277 psraw mm1, 6 271 psraw mm1, 6
278 psraw mm2, 6 272 psraw mm2, 6
279 packuswb mm1, mm2 273 packuswb mm1, mm2
280 movntq [ebp], mm1 274 movntq [ebp], mm1
281 add ebp, 8 275 add ebp, 8
282 wend : 276 convertend :
283 sub ecx, 2 277 sub ecx, 2
284 jns wloop 278 jns convertloop
285 279
286 and ecx, 1 // odd number of pixels? 280 and ecx, 1 // odd number of pixels?
287 jz wdone 281 jz convertdone
288 282
289 movzx eax, byte ptr [edi] 283 movzx eax, byte ptr [edi]
290 movq mm0, [coefficients_RGB_U + 8 * eax] 284 movq mm0, [kCoefficientsRgbU + 8 * eax]
291 movzx eax, byte ptr [esi] 285 movzx eax, byte ptr [esi]
292 paddsw mm0, [coefficients_RGB_V + 8 * eax] 286 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
293 movzx eax, byte ptr [edx] 287 movzx eax, byte ptr [edx]
294 movq mm1, [coefficients_RGB_Y + 8 * eax] 288 movq mm1, [kCoefficientsRgbY + 8 * eax]
295 paddsw mm1, mm0 289 paddsw mm1, mm0
296 psraw mm1, 6 290 psraw mm1, 6
297 packuswb mm1, mm1 291 packuswb mm1, mm1
298 movd [ebp], mm1 292 movd [ebp], mm1
299 wdone : 293 convertdone :
300 294
301 popad 295 popad
302 ret 296 ret
303 } 297 }
304 } 298 }
305 299
306 __declspec(naked) 300 __declspec(naked)
307 void ConvertYUVToRGB32Row(const uint8* y_buf, 301 void ConvertYUVToRGB32Row(const uint8* y_buf,
308 const uint8* u_buf, 302 const uint8* u_buf,
309 const uint8* v_buf, 303 const uint8* v_buf,
310 uint8* rgb_buf, 304 uint8* rgb_buf,
311 int width, 305 int width,
312 int step) { 306 int step) {
313 __asm { 307 __asm {
314 pushad 308 pushad
315 mov edx, [esp + 32 + 4] // Y 309 mov edx, [esp + 32 + 4] // Y
316 mov edi, [esp + 32 + 8] // U 310 mov edi, [esp + 32 + 8] // U
317 mov esi, [esp + 32 + 12] // V 311 mov esi, [esp + 32 + 12] // V
318 mov ebp, [esp + 32 + 16] // rgb 312 mov ebp, [esp + 32 + 16] // rgb
319 mov ecx, [esp + 32 + 20] // width 313 mov ecx, [esp + 32 + 20] // width
320 mov ebx, [esp + 32 + 24] // step 314 mov ebx, [esp + 32 + 24] // step
321 jmp wend 315 jmp wend
322 316
323 wloop : 317 wloop :
324 movzx eax, byte ptr [edi] 318 movzx eax, byte ptr [edi]
325 add edi, ebx 319 add edi, ebx
326 movq mm0, [coefficients_RGB_U + 8 * eax] 320 movq mm0, [kCoefficientsRgbU + 8 * eax]
327 movzx eax, byte ptr [esi] 321 movzx eax, byte ptr [esi]
328 add esi, ebx 322 add esi, ebx
329 paddsw mm0, [coefficients_RGB_V + 8 * eax] 323 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
330 movzx eax, byte ptr [edx] 324 movzx eax, byte ptr [edx]
331 add edx, ebx 325 add edx, ebx
332 movq mm1, [coefficients_RGB_Y + 8 * eax] 326 movq mm1, [kCoefficientsRgbY + 8 * eax]
333 movzx eax, byte ptr [edx] 327 movzx eax, byte ptr [edx]
334 add edx, ebx 328 add edx, ebx
335 movq mm2, [coefficients_RGB_Y + 8 * eax] 329 movq mm2, [kCoefficientsRgbY + 8 * eax]
336 paddsw mm1, mm0 330 paddsw mm1, mm0
337 paddsw mm2, mm0 331 paddsw mm2, mm0
338 psraw mm1, 6 332 psraw mm1, 6
339 psraw mm2, 6 333 psraw mm2, 6
340 packuswb mm1, mm2 334 packuswb mm1, mm2
341 movntq [ebp], mm1 335 movntq [ebp], mm1
342 add ebp, 8 336 add ebp, 8
343 wend : 337 wend :
344 sub ecx, 2 338 sub ecx, 2
345 jns wloop 339 jns wloop
346 340
347 and ecx, 1 // odd number of pixels? 341 and ecx, 1 // odd number of pixels?
348 jz wdone 342 jz wdone
349 343
350 movzx eax, byte ptr [edi] 344 movzx eax, byte ptr [edi]
351 movq mm0, [coefficients_RGB_U + 8 * eax] 345 movq mm0, [kCoefficientsRgbU + 8 * eax]
352 movzx eax, byte ptr [esi] 346 movzx eax, byte ptr [esi]
353 paddsw mm0, [coefficients_RGB_V + 8 * eax] 347 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
354 movzx eax, byte ptr [edx] 348 movzx eax, byte ptr [edx]
355 movq mm1, [coefficients_RGB_Y + 8 * eax] 349 movq mm1, [kCoefficientsRgbY + 8 * eax]
356 paddsw mm1, mm0 350 paddsw mm1, mm0
357 psraw mm1, 6 351 psraw mm1, 6
358 packuswb mm1, mm1 352 packuswb mm1, mm1
359 movd [ebp], mm1 353 movd [ebp], mm1
360 wdone : 354 wdone :
361 355
362 popad 356 popad
363 ret 357 ret
364 } 358 }
365 } 359 }
(...skipping 12 matching lines...) Expand all
378 mov edi, [esp + 32 + 8] // U 372 mov edi, [esp + 32 + 8] // U
379 mov esi, [esp + 32 + 12] // V 373 mov esi, [esp + 32 + 12] // V
380 mov ebp, [esp + 32 + 16] // rgb 374 mov ebp, [esp + 32 + 16] // rgb
381 mov ecx, [esp + 32 + 20] // width 375 mov ecx, [esp + 32 + 20] // width
382 jmp wend 376 jmp wend
383 377
384 wloop : 378 wloop :
385 movzx eax, byte ptr [edi] 379 movzx eax, byte ptr [edi]
386 mov ebx, [esp + 32 + 28] // uvstep 380 mov ebx, [esp + 32 + 28] // uvstep
387 add edi, ebx 381 add edi, ebx
388 movq mm0, [coefficients_RGB_U + 8 * eax] 382 movq mm0, [kCoefficientsRgbU + 8 * eax]
389 movzx eax, byte ptr [esi] 383 movzx eax, byte ptr [esi]
390 add esi, ebx 384 add esi, ebx
391 paddsw mm0, [coefficients_RGB_V + 8 * eax] 385 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
392 movzx eax, byte ptr [edx] 386 movzx eax, byte ptr [edx]
393 mov ebx, [esp + 32 + 24] // ystep 387 mov ebx, [esp + 32 + 24] // ystep
394 add edx, ebx 388 add edx, ebx
395 movq mm1, [coefficients_RGB_Y + 8 * eax] 389 movq mm1, [kCoefficientsRgbY + 8 * eax]
396 movzx eax, byte ptr [edx] 390 movzx eax, byte ptr [edx]
397 add edx, ebx 391 add edx, ebx
398 movq mm2, [coefficients_RGB_Y + 8 * eax] 392 movq mm2, [kCoefficientsRgbY + 8 * eax]
399 paddsw mm1, mm0 393 paddsw mm1, mm0
400 paddsw mm2, mm0 394 paddsw mm2, mm0
401 psraw mm1, 6 395 psraw mm1, 6
402 psraw mm2, 6 396 psraw mm2, 6
403 packuswb mm1, mm2 397 packuswb mm1, mm2
404 movntq [ebp], mm1 398 movntq [ebp], mm1
405 add ebp, 8 399 add ebp, 8
406 wend : 400 wend :
407 sub ecx, 2 401 sub ecx, 2
408 jns wloop 402 jns wloop
409 403
410 and ecx, 1 // odd number of pixels? 404 and ecx, 1 // odd number of pixels?
411 jz wdone 405 jz wdone
412 406
413 movzx eax, byte ptr [edi] 407 movzx eax, byte ptr [edi]
414 movq mm0, [coefficients_RGB_U + 8 * eax] 408 movq mm0, [kCoefficientsRgbU + 8 * eax]
415 movzx eax, byte ptr [esi] 409 movzx eax, byte ptr [esi]
416 paddsw mm0, [coefficients_RGB_V + 8 * eax] 410 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
417 movzx eax, byte ptr [edx] 411 movzx eax, byte ptr [edx]
418 movq mm1, [coefficients_RGB_Y + 8 * eax] 412 movq mm1, [kCoefficientsRgbY + 8 * eax]
419 paddsw mm1, mm0 413 paddsw mm1, mm0
420 psraw mm1, 6 414 psraw mm1, 6
421 packuswb mm1, mm1 415 packuswb mm1, mm1
422 movd [ebp], mm1 416 movd [ebp], mm1
423 wdone : 417 wdone :
424 418
425 popad 419 popad
426 ret 420 ret
427 } 421 }
428 } 422 }
(...skipping 11 matching lines...) Expand all
440 mov esi, [esp + 32 + 12] // V 434 mov esi, [esp + 32 + 12] // V
441 mov ebp, [esp + 32 + 16] // rgb 435 mov ebp, [esp + 32 + 16] // rgb
442 mov ecx, [esp + 32 + 20] // width 436 mov ecx, [esp + 32 + 20] // width
443 jmp wend 437 jmp wend
444 438
445 wloop : 439 wloop :
446 movzx eax, byte ptr [edi] 440 movzx eax, byte ptr [edi]
447 add edi, 1 441 add edi, 1
448 movzx ebx, byte ptr [esi] 442 movzx ebx, byte ptr [esi]
449 add esi, 1 443 add esi, 1
450 movq mm0, [coefficients_RGB_U + 8 * eax] 444 movq mm0, [kCoefficientsRgbU + 8 * eax]
451 movzx eax, byte ptr [edx] 445 movzx eax, byte ptr [edx]
452 paddsw mm0, [coefficients_RGB_V + 8 * ebx] 446 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
453 movq mm1, [coefficients_RGB_Y + 8 * eax] 447 movq mm1, [kCoefficientsRgbY + 8 * eax]
454 paddsw mm1, mm0 448 paddsw mm1, mm0
455 psraw mm1, 6 449 psraw mm1, 6
456 packuswb mm1, mm1 450 packuswb mm1, mm1
457 punpckldq mm1, mm1 451 punpckldq mm1, mm1
458 movntq [ebp], mm1 452 movntq [ebp], mm1
459 453
460 movzx ebx, byte ptr [edx + 1] 454 movzx ebx, byte ptr [edx + 1]
461 add edx, 2 455 add edx, 2
462 paddsw mm0, [coefficients_RGB_Y + 8 * ebx] 456 paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
463 psraw mm0, 6 457 psraw mm0, 6
464 packuswb mm0, mm0 458 packuswb mm0, mm0
465 punpckldq mm0, mm0 459 punpckldq mm0, mm0
466 movntq [ebp+8], mm0 460 movntq [ebp+8], mm0
467 add ebp, 16 461 add ebp, 16
468 wend : 462 wend :
469 sub ecx, 4 463 sub ecx, 4
470 jns wloop 464 jns wloop
471 465
472 add ecx, 4 466 add ecx, 4
473 jz wdone 467 jz wdone
474 468
475 movzx eax, byte ptr [edi] 469 movzx eax, byte ptr [edi]
476 movq mm0, [coefficients_RGB_U + 8 * eax] 470 movq mm0, [kCoefficientsRgbU + 8 * eax]
477 movzx eax, byte ptr [esi] 471 movzx eax, byte ptr [esi]
478 paddsw mm0, [coefficients_RGB_V + 8 * eax] 472 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
479 movzx eax, byte ptr [edx] 473 movzx eax, byte ptr [edx]
480 movq mm1, [coefficients_RGB_Y + 8 * eax] 474 movq mm1, [kCoefficientsRgbY + 8 * eax]
481 paddsw mm1, mm0 475 paddsw mm1, mm0
482 psraw mm1, 6 476 psraw mm1, 6
483 packuswb mm1, mm1 477 packuswb mm1, mm1
484 jmp wend1 478 jmp wend1
485 479
486 wloop1 : 480 wloop1 :
487 movd [ebp], mm1 481 movd [ebp], mm1
488 add ebp, 4 482 add ebp, 4
489 wend1 : 483 wend1 :
490 sub ecx, 1 484 sub ecx, 1
(...skipping 16 matching lines...) Expand all
507 int width, 501 int width,
508 int dx) { 502 int dx) {
509 __asm { 503 __asm {
510 pushad 504 pushad
511 mov edx, [esp + 32 + 4] // Y 505 mov edx, [esp + 32 + 4] // Y
512 mov edi, [esp + 32 + 8] // U 506 mov edi, [esp + 32 + 8] // U
513 mov esi, [esp + 32 + 12] // V 507 mov esi, [esp + 32 + 12] // V
514 mov ebp, [esp + 32 + 16] // rgb 508 mov ebp, [esp + 32 + 16] // rgb
515 mov ecx, [esp + 32 + 20] // width 509 mov ecx, [esp + 32 + 20] // width
516 xor ebx, ebx // x 510 xor ebx, ebx // x
517 jmp wend 511 jmp scaleend
518 512
519 wloop : 513 scaleloop :
520 mov eax, ebx 514 mov eax, ebx
521 sar eax, 5 515 sar eax, 5
522 movzx eax, byte ptr [edi + eax] 516 movzx eax, byte ptr [edi + eax]
523 movq mm0, [coefficients_RGB_U + 8 * eax] 517 movq mm0, [kCoefficientsRgbU + 8 * eax]
524 mov eax, ebx 518 mov eax, ebx
525 sar eax, 5 519 sar eax, 5
526 movzx eax, byte ptr [esi + eax] 520 movzx eax, byte ptr [esi + eax]
527 paddsw mm0, [coefficients_RGB_V + 8 * eax] 521 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
528 mov eax, ebx 522 mov eax, ebx
529 add ebx, [esp + 32 + 24] // x += dx 523 add ebx, [esp + 32 + 24] // x += dx
530 sar eax, 4 524 sar eax, 4
531 movzx eax, byte ptr [edx + eax] 525 movzx eax, byte ptr [edx + eax]
532 movq mm1, [coefficients_RGB_Y + 8 * eax] 526 movq mm1, [kCoefficientsRgbY + 8 * eax]
533 mov eax, ebx 527 mov eax, ebx
534 add ebx, [esp + 32 + 24] // x += dx 528 add ebx, [esp + 32 + 24] // x += dx
535 sar eax, 4 529 sar eax, 4
536 movzx eax, byte ptr [edx + eax] 530 movzx eax, byte ptr [edx + eax]
537 movq mm2, [coefficients_RGB_Y + 8 * eax] 531 movq mm2, [kCoefficientsRgbY + 8 * eax]
538 paddsw mm1, mm0 532 paddsw mm1, mm0
539 paddsw mm2, mm0 533 paddsw mm2, mm0
540 psraw mm1, 6 534 psraw mm1, 6
541 psraw mm2, 6 535 psraw mm2, 6
542 packuswb mm1, mm2 536 packuswb mm1, mm2
543 movntq [ebp], mm1 537 movntq [ebp], mm1
544 add ebp, 8 538 add ebp, 8
545 wend : 539 scaleend :
546 sub ecx, 2 540 sub ecx, 2
547 jns wloop 541 jns scaleloop
548 542
549 and ecx, 1 // odd number of pixels? 543 and ecx, 1 // odd number of pixels?
550 jz wdone 544 jz scaledone
551 545
552 mov eax, ebx 546 mov eax, ebx
553 sar eax, 5 547 sar eax, 5
554 movzx eax, byte ptr [edi + eax] 548 movzx eax, byte ptr [edi + eax]
555 movq mm0, [coefficients_RGB_U + 8 * eax] 549 movq mm0, [kCoefficientsRgbU + 8 * eax]
556 mov eax, ebx 550 mov eax, ebx
557 sar eax, 5 551 sar eax, 5
558 movzx eax, byte ptr [esi + eax] 552 movzx eax, byte ptr [esi + eax]
559 paddsw mm0, [coefficients_RGB_V + 8 * eax] 553 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
560 mov eax, ebx 554 mov eax, ebx
561 sar eax, 4 555 sar eax, 4
562 movzx eax, byte ptr [edx + eax] 556 movzx eax, byte ptr [edx + eax]
563 movq mm1, [coefficients_RGB_Y + 8 * eax] 557 movq mm1, [kCoefficientsRgbY + 8 * eax]
564 mov eax, ebx 558 mov eax, ebx
565 sar eax, 4 559 sar eax, 4
566 movzx eax, byte ptr [edx + eax] 560 movzx eax, byte ptr [edx + eax]
567 movq mm2, [coefficients_RGB_Y + 8 * eax] 561 movq mm2, [kCoefficientsRgbY + 8 * eax]
568 paddsw mm1, mm0 562 paddsw mm1, mm0
569 paddsw mm2, mm0 563 paddsw mm2, mm0
570 psraw mm1, 6 564 psraw mm1, 6
571 psraw mm2, 6 565 psraw mm2, 6
572 packuswb mm1, mm2 566 packuswb mm1, mm2
573 movd [ebp], mm1 567 movd [ebp], mm1
574 568
575 wdone : 569 scaledone :
576
577 popad 570 popad
578 ret 571 ret
579 } 572 }
580 } 573 }
574 } // extern "C"
581 575
582 } // namespace media
583
OLDNEW
« no previous file with comments | « media/base/yuv_row_mac.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698