Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(193)

Side by Side Diff: media/base/yuv_row_win.cc

Issue 7942010: Replacing movntq instruction with movq for 874 branch. (Closed) Base URL: svn://svn.chromium.org/chrome/branches/874/src/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/yuv_row_posix.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/base/yuv_row.h" 5 #include "media/base/yuv_row.h"
6 6
7 #define kCoefficientsRgbU kCoefficientsRgbY + 2048 7 #define kCoefficientsRgbU kCoefficientsRgbY + 2048
8 #define kCoefficientsRgbV kCoefficientsRgbY + 4096 8 #define kCoefficientsRgbV kCoefficientsRgbY + 4096
9 9
10 extern "C" { 10 extern "C" {
11 11
12 // Branch 874 specific fix to disable movntq to prevent crashes on Pentium IIs.
13 #define USE_MOVNTQ 0
14
12 #if USE_MMX 15 #if USE_MMX
13 __declspec(naked) 16 __declspec(naked)
14 void FastConvertYUVToRGB32Row(const uint8* y_buf, 17 void FastConvertYUVToRGB32Row(const uint8* y_buf,
15 const uint8* u_buf, 18 const uint8* u_buf,
16 const uint8* v_buf, 19 const uint8* v_buf,
17 uint8* rgb_buf, 20 uint8* rgb_buf,
18 int width) { 21 int width) {
19 __asm { 22 __asm {
20 pushad 23 pushad
21 mov edx, [esp + 32 + 4] // Y 24 mov edx, [esp + 32 + 4] // Y
(...skipping 13 matching lines...) Expand all
35 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] 38 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
36 movzx ebx, byte ptr [edx + 1] 39 movzx ebx, byte ptr [edx + 1]
37 movq mm1, [kCoefficientsRgbY + 8 * eax] 40 movq mm1, [kCoefficientsRgbY + 8 * eax]
38 add edx, 2 41 add edx, 2
39 movq mm2, [kCoefficientsRgbY + 8 * ebx] 42 movq mm2, [kCoefficientsRgbY + 8 * ebx]
40 paddsw mm1, mm0 43 paddsw mm1, mm0
41 paddsw mm2, mm0 44 paddsw mm2, mm0
42 psraw mm1, 6 45 psraw mm1, 6
43 psraw mm2, 6 46 psraw mm2, 6
44 packuswb mm1, mm2 47 packuswb mm1, mm2
48 #if USE_MOVNTQ
45 movntq [ebp], mm1 49 movntq [ebp], mm1
50 #else
51 movq [ebp], mm1
52 #endif
46 add ebp, 8 53 add ebp, 8
47 convertend : 54 convertend :
48 sub ecx, 2 55 sub ecx, 2
49 jns convertloop 56 jns convertloop
50 57
51 and ecx, 1 // odd number of pixels? 58 and ecx, 1 // odd number of pixels?
52 jz convertdone 59 jz convertdone
53 60
54 movzx eax, byte ptr [edi] 61 movzx eax, byte ptr [edi]
55 movq mm0, [kCoefficientsRgbU + 8 * eax] 62 movq mm0, [kCoefficientsRgbU + 8 * eax]
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 add edx, ebx 103 add edx, ebx
97 movq mm1, [kCoefficientsRgbY + 8 * eax] 104 movq mm1, [kCoefficientsRgbY + 8 * eax]
98 movzx eax, byte ptr [edx] 105 movzx eax, byte ptr [edx]
99 add edx, ebx 106 add edx, ebx
100 movq mm2, [kCoefficientsRgbY + 8 * eax] 107 movq mm2, [kCoefficientsRgbY + 8 * eax]
101 paddsw mm1, mm0 108 paddsw mm1, mm0
102 paddsw mm2, mm0 109 paddsw mm2, mm0
103 psraw mm1, 6 110 psraw mm1, 6
104 psraw mm2, 6 111 psraw mm2, 6
105 packuswb mm1, mm2 112 packuswb mm1, mm2
113 #if USE_MOVNTQ
106 movntq [ebp], mm1 114 movntq [ebp], mm1
115 #else
116 movq [ebp], mm1
117 #endif
107 add ebp, 8 118 add ebp, 8
108 wend : 119 wend :
109 sub ecx, 2 120 sub ecx, 2
110 jns wloop 121 jns wloop
111 122
112 and ecx, 1 // odd number of pixels? 123 and ecx, 1 // odd number of pixels?
113 jz wdone 124 jz wdone
114 125
115 movzx eax, byte ptr [edi] 126 movzx eax, byte ptr [edi]
116 movq mm0, [kCoefficientsRgbU + 8 * eax] 127 movq mm0, [kCoefficientsRgbU + 8 * eax]
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 add edx, ebx 170 add edx, ebx
160 movq mm1, [kCoefficientsRgbY + 8 * eax] 171 movq mm1, [kCoefficientsRgbY + 8 * eax]
161 movzx eax, byte ptr [edx] 172 movzx eax, byte ptr [edx]
162 add edx, ebx 173 add edx, ebx
163 movq mm2, [kCoefficientsRgbY + 8 * eax] 174 movq mm2, [kCoefficientsRgbY + 8 * eax]
164 paddsw mm1, mm0 175 paddsw mm1, mm0
165 paddsw mm2, mm0 176 paddsw mm2, mm0
166 psraw mm1, 6 177 psraw mm1, 6
167 psraw mm2, 6 178 psraw mm2, 6
168 packuswb mm1, mm2 179 packuswb mm1, mm2
180 #if USE_MOVNTQ
169 movntq [ebp], mm1 181 movntq [ebp], mm1
182 #else
183 movq [ebp], mm1
184 #endif
170 add ebp, 8 185 add ebp, 8
171 wend : 186 wend :
172 sub ecx, 2 187 sub ecx, 2
173 jns wloop 188 jns wloop
174 189
175 and ecx, 1 // odd number of pixels? 190 and ecx, 1 // odd number of pixels?
176 jz wdone 191 jz wdone
177 192
178 movzx eax, byte ptr [edi] 193 movzx eax, byte ptr [edi]
179 movq mm0, [kCoefficientsRgbU + 8 * eax] 194 movq mm0, [kCoefficientsRgbU + 8 * eax]
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 movzx ebx, byte ptr [esi] 228 movzx ebx, byte ptr [esi]
214 add esi, 1 229 add esi, 1
215 movq mm0, [kCoefficientsRgbU + 8 * eax] 230 movq mm0, [kCoefficientsRgbU + 8 * eax]
216 movzx eax, byte ptr [edx] 231 movzx eax, byte ptr [edx]
217 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] 232 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
218 movq mm1, [kCoefficientsRgbY + 8 * eax] 233 movq mm1, [kCoefficientsRgbY + 8 * eax]
219 paddsw mm1, mm0 234 paddsw mm1, mm0
220 psraw mm1, 6 235 psraw mm1, 6
221 packuswb mm1, mm1 236 packuswb mm1, mm1
222 punpckldq mm1, mm1 237 punpckldq mm1, mm1
238 #if USE_MOVNTQ
223 movntq [ebp], mm1 239 movntq [ebp], mm1
240 #else
241 movq [ebp], mm1
242 #endif
224 243
225 movzx ebx, byte ptr [edx + 1] 244 movzx ebx, byte ptr [edx + 1]
226 add edx, 2 245 add edx, 2
227 paddsw mm0, [kCoefficientsRgbY + 8 * ebx] 246 paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
228 psraw mm0, 6 247 psraw mm0, 6
229 packuswb mm0, mm0 248 packuswb mm0, mm0
230 punpckldq mm0, mm0 249 punpckldq mm0, mm0
250 #if USE_MOVNTQ
231 movntq [ebp+8], mm0 251 movntq [ebp+8], mm0
252 #else
253 movq [ebp+8], mm0
254 #endif
232 add ebp, 16 255 add ebp, 16
233 wend : 256 wend :
234 sub ecx, 4 257 sub ecx, 4
235 jns wloop 258 jns wloop
236 259
237 add ecx, 4 260 add ecx, 4
238 jz wdone 261 jz wdone
239 262
240 movzx eax, byte ptr [edi] 263 movzx eax, byte ptr [edi]
241 movq mm0, [kCoefficientsRgbU + 8 * eax] 264 movq mm0, [kCoefficientsRgbU + 8 * eax]
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
298 mov eax, ebx 321 mov eax, ebx
299 add ebx, [esp + 32 + 24] // x += source_dx 322 add ebx, [esp + 32 + 24] // x += source_dx
300 sar eax, 16 323 sar eax, 16
301 movzx eax, byte ptr [edx + eax] 324 movzx eax, byte ptr [edx + eax]
302 movq mm2, [kCoefficientsRgbY + 8 * eax] 325 movq mm2, [kCoefficientsRgbY + 8 * eax]
303 paddsw mm1, mm0 326 paddsw mm1, mm0
304 paddsw mm2, mm0 327 paddsw mm2, mm0
305 psraw mm1, 6 328 psraw mm1, 6
306 psraw mm2, 6 329 psraw mm2, 6
307 packuswb mm1, mm2 330 packuswb mm1, mm2
331 #if USE_MOVNTQ
308 movntq [ebp], mm1 332 movntq [ebp], mm1
333 #else
334 movq [ebp], mm1
335 #endif
309 add ebp, 8 336 add ebp, 8
310 scaleend : 337 scaleend :
311 sub ecx, 2 338 sub ecx, 2
312 jns scaleloop 339 jns scaleloop
313 340
314 and ecx, 1 // odd number of pixels? 341 and ecx, 1 // odd number of pixels?
315 jz scaledone 342 jz scaledone
316 343
317 mov eax, ebx 344 mov eax, ebx
318 sar eax, 17 345 sar eax, 17
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
418 imul ecx, eax 445 imul ecx, eax
419 add ecx, esi 446 add ecx, esi
420 shr ecx, 16 447 shr ecx, 16
421 movq mm2, [kCoefficientsRgbY + 8 * ecx] 448 movq mm2, [kCoefficientsRgbY + 8 * ecx]
422 449
423 paddsw mm1, mm0 450 paddsw mm1, mm0
424 paddsw mm2, mm0 451 paddsw mm2, mm0
425 psraw mm1, 0x6 452 psraw mm1, 0x6
426 psraw mm2, 0x6 453 psraw mm2, 0x6
427 packuswb mm1, mm2 454 packuswb mm1, mm2
455 #if USE_MOVNTQ
428 movntq [ebp], mm1 456 movntq [ebp], mm1
457 #else
458 movq [ebp], mm1
459 #endif
429 add ebp, 0x8 460 add ebp, 0x8
430 461
431 lscaleend: 462 lscaleend:
432 cmp ebx, [esp + 32 + 20] 463 cmp ebx, [esp + 32 + 20]
433 jl lscaleloop 464 jl lscaleloop
434 popad 465 popad
435 ret 466 ret
436 467
437 lscalelastpixel: 468 lscalelastpixel:
438 paddsw mm1, mm0 469 paddsw mm1, mm0
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
579 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; 610 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
580 YuvPixel(y, u, v, rgb_buf+4); 611 YuvPixel(y, u, v, rgb_buf+4);
581 x += source_dx; 612 x += source_dx;
582 } 613 }
583 rgb_buf += 8; 614 rgb_buf += 8;
584 } 615 }
585 } 616 }
586 617
587 #endif // USE_MMX 618 #endif // USE_MMX
588 } // extern "C" 619 } // extern "C"
589
OLDNEW
« no previous file with comments | « media/base/yuv_row_posix.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698