| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 #define kCoefficientsRgbU kCoefficientsRgbY + 2048 | 7 #define kCoefficientsRgbU kCoefficientsRgbY + 2048 |
| 8 #define kCoefficientsRgbV kCoefficientsRgbY + 4096 | 8 #define kCoefficientsRgbV kCoefficientsRgbY + 4096 |
| 9 | 9 |
| 10 extern "C" { | 10 extern "C" { |
| 11 | 11 |
| 12 // Branch 874 specific fix to disable movntq to prevent crashes on Pentium IIs. |
| 13 #define USE_MOVNTQ 0 |
| 14 |
| 12 #if USE_MMX | 15 #if USE_MMX |
| 13 __declspec(naked) | 16 __declspec(naked) |
| 14 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 17 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 15 const uint8* u_buf, | 18 const uint8* u_buf, |
| 16 const uint8* v_buf, | 19 const uint8* v_buf, |
| 17 uint8* rgb_buf, | 20 uint8* rgb_buf, |
| 18 int width) { | 21 int width) { |
| 19 __asm { | 22 __asm { |
| 20 pushad | 23 pushad |
| 21 mov edx, [esp + 32 + 4] // Y | 24 mov edx, [esp + 32 + 4] // Y |
| (...skipping 13 matching lines...) Expand all Loading... |
| 35 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] | 38 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] |
| 36 movzx ebx, byte ptr [edx + 1] | 39 movzx ebx, byte ptr [edx + 1] |
| 37 movq mm1, [kCoefficientsRgbY + 8 * eax] | 40 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 38 add edx, 2 | 41 add edx, 2 |
| 39 movq mm2, [kCoefficientsRgbY + 8 * ebx] | 42 movq mm2, [kCoefficientsRgbY + 8 * ebx] |
| 40 paddsw mm1, mm0 | 43 paddsw mm1, mm0 |
| 41 paddsw mm2, mm0 | 44 paddsw mm2, mm0 |
| 42 psraw mm1, 6 | 45 psraw mm1, 6 |
| 43 psraw mm2, 6 | 46 psraw mm2, 6 |
| 44 packuswb mm1, mm2 | 47 packuswb mm1, mm2 |
| 48 #if USE_MOVNTQ |
| 45 movntq [ebp], mm1 | 49 movntq [ebp], mm1 |
| 50 #else |
| 51 movq [ebp], mm1 |
| 52 #endif |
| 46 add ebp, 8 | 53 add ebp, 8 |
| 47 convertend : | 54 convertend : |
| 48 sub ecx, 2 | 55 sub ecx, 2 |
| 49 jns convertloop | 56 jns convertloop |
| 50 | 57 |
| 51 and ecx, 1 // odd number of pixels? | 58 and ecx, 1 // odd number of pixels? |
| 52 jz convertdone | 59 jz convertdone |
| 53 | 60 |
| 54 movzx eax, byte ptr [edi] | 61 movzx eax, byte ptr [edi] |
| 55 movq mm0, [kCoefficientsRgbU + 8 * eax] | 62 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 add edx, ebx | 103 add edx, ebx |
| 97 movq mm1, [kCoefficientsRgbY + 8 * eax] | 104 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 98 movzx eax, byte ptr [edx] | 105 movzx eax, byte ptr [edx] |
| 99 add edx, ebx | 106 add edx, ebx |
| 100 movq mm2, [kCoefficientsRgbY + 8 * eax] | 107 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 101 paddsw mm1, mm0 | 108 paddsw mm1, mm0 |
| 102 paddsw mm2, mm0 | 109 paddsw mm2, mm0 |
| 103 psraw mm1, 6 | 110 psraw mm1, 6 |
| 104 psraw mm2, 6 | 111 psraw mm2, 6 |
| 105 packuswb mm1, mm2 | 112 packuswb mm1, mm2 |
| 113 #if USE_MOVNTQ |
| 106 movntq [ebp], mm1 | 114 movntq [ebp], mm1 |
| 115 #else |
| 116 movq [ebp], mm1 |
| 117 #endif |
| 107 add ebp, 8 | 118 add ebp, 8 |
| 108 wend : | 119 wend : |
| 109 sub ecx, 2 | 120 sub ecx, 2 |
| 110 jns wloop | 121 jns wloop |
| 111 | 122 |
| 112 and ecx, 1 // odd number of pixels? | 123 and ecx, 1 // odd number of pixels? |
| 113 jz wdone | 124 jz wdone |
| 114 | 125 |
| 115 movzx eax, byte ptr [edi] | 126 movzx eax, byte ptr [edi] |
| 116 movq mm0, [kCoefficientsRgbU + 8 * eax] | 127 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 159 add edx, ebx | 170 add edx, ebx |
| 160 movq mm1, [kCoefficientsRgbY + 8 * eax] | 171 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 161 movzx eax, byte ptr [edx] | 172 movzx eax, byte ptr [edx] |
| 162 add edx, ebx | 173 add edx, ebx |
| 163 movq mm2, [kCoefficientsRgbY + 8 * eax] | 174 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 164 paddsw mm1, mm0 | 175 paddsw mm1, mm0 |
| 165 paddsw mm2, mm0 | 176 paddsw mm2, mm0 |
| 166 psraw mm1, 6 | 177 psraw mm1, 6 |
| 167 psraw mm2, 6 | 178 psraw mm2, 6 |
| 168 packuswb mm1, mm2 | 179 packuswb mm1, mm2 |
| 180 #if USE_MOVNTQ |
| 169 movntq [ebp], mm1 | 181 movntq [ebp], mm1 |
| 182 #else |
| 183 movq [ebp], mm1 |
| 184 #endif |
| 170 add ebp, 8 | 185 add ebp, 8 |
| 171 wend : | 186 wend : |
| 172 sub ecx, 2 | 187 sub ecx, 2 |
| 173 jns wloop | 188 jns wloop |
| 174 | 189 |
| 175 and ecx, 1 // odd number of pixels? | 190 and ecx, 1 // odd number of pixels? |
| 176 jz wdone | 191 jz wdone |
| 177 | 192 |
| 178 movzx eax, byte ptr [edi] | 193 movzx eax, byte ptr [edi] |
| 179 movq mm0, [kCoefficientsRgbU + 8 * eax] | 194 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 213 movzx ebx, byte ptr [esi] | 228 movzx ebx, byte ptr [esi] |
| 214 add esi, 1 | 229 add esi, 1 |
| 215 movq mm0, [kCoefficientsRgbU + 8 * eax] | 230 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 216 movzx eax, byte ptr [edx] | 231 movzx eax, byte ptr [edx] |
| 217 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] | 232 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] |
| 218 movq mm1, [kCoefficientsRgbY + 8 * eax] | 233 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 219 paddsw mm1, mm0 | 234 paddsw mm1, mm0 |
| 220 psraw mm1, 6 | 235 psraw mm1, 6 |
| 221 packuswb mm1, mm1 | 236 packuswb mm1, mm1 |
| 222 punpckldq mm1, mm1 | 237 punpckldq mm1, mm1 |
| 238 #if USE_MOVNTQ |
| 223 movntq [ebp], mm1 | 239 movntq [ebp], mm1 |
| 240 #else |
| 241 movq [ebp], mm1 |
| 242 #endif |
| 224 | 243 |
| 225 movzx ebx, byte ptr [edx + 1] | 244 movzx ebx, byte ptr [edx + 1] |
| 226 add edx, 2 | 245 add edx, 2 |
| 227 paddsw mm0, [kCoefficientsRgbY + 8 * ebx] | 246 paddsw mm0, [kCoefficientsRgbY + 8 * ebx] |
| 228 psraw mm0, 6 | 247 psraw mm0, 6 |
| 229 packuswb mm0, mm0 | 248 packuswb mm0, mm0 |
| 230 punpckldq mm0, mm0 | 249 punpckldq mm0, mm0 |
| 250 #if USE_MOVNTQ |
| 231 movntq [ebp+8], mm0 | 251 movntq [ebp+8], mm0 |
| 252 #else |
| 253 movq [ebp+8], mm0 |
| 254 #endif |
| 232 add ebp, 16 | 255 add ebp, 16 |
| 233 wend : | 256 wend : |
| 234 sub ecx, 4 | 257 sub ecx, 4 |
| 235 jns wloop | 258 jns wloop |
| 236 | 259 |
| 237 add ecx, 4 | 260 add ecx, 4 |
| 238 jz wdone | 261 jz wdone |
| 239 | 262 |
| 240 movzx eax, byte ptr [edi] | 263 movzx eax, byte ptr [edi] |
| 241 movq mm0, [kCoefficientsRgbU + 8 * eax] | 264 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 298 mov eax, ebx | 321 mov eax, ebx |
| 299 add ebx, [esp + 32 + 24] // x += source_dx | 322 add ebx, [esp + 32 + 24] // x += source_dx |
| 300 sar eax, 16 | 323 sar eax, 16 |
| 301 movzx eax, byte ptr [edx + eax] | 324 movzx eax, byte ptr [edx + eax] |
| 302 movq mm2, [kCoefficientsRgbY + 8 * eax] | 325 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 303 paddsw mm1, mm0 | 326 paddsw mm1, mm0 |
| 304 paddsw mm2, mm0 | 327 paddsw mm2, mm0 |
| 305 psraw mm1, 6 | 328 psraw mm1, 6 |
| 306 psraw mm2, 6 | 329 psraw mm2, 6 |
| 307 packuswb mm1, mm2 | 330 packuswb mm1, mm2 |
| 331 #if USE_MOVNTQ |
| 308 movntq [ebp], mm1 | 332 movntq [ebp], mm1 |
| 333 #else |
| 334 movq [ebp], mm1 |
| 335 #endif |
| 309 add ebp, 8 | 336 add ebp, 8 |
| 310 scaleend : | 337 scaleend : |
| 311 sub ecx, 2 | 338 sub ecx, 2 |
| 312 jns scaleloop | 339 jns scaleloop |
| 313 | 340 |
| 314 and ecx, 1 // odd number of pixels? | 341 and ecx, 1 // odd number of pixels? |
| 315 jz scaledone | 342 jz scaledone |
| 316 | 343 |
| 317 mov eax, ebx | 344 mov eax, ebx |
| 318 sar eax, 17 | 345 sar eax, 17 |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 418 imul ecx, eax | 445 imul ecx, eax |
| 419 add ecx, esi | 446 add ecx, esi |
| 420 shr ecx, 16 | 447 shr ecx, 16 |
| 421 movq mm2, [kCoefficientsRgbY + 8 * ecx] | 448 movq mm2, [kCoefficientsRgbY + 8 * ecx] |
| 422 | 449 |
| 423 paddsw mm1, mm0 | 450 paddsw mm1, mm0 |
| 424 paddsw mm2, mm0 | 451 paddsw mm2, mm0 |
| 425 psraw mm1, 0x6 | 452 psraw mm1, 0x6 |
| 426 psraw mm2, 0x6 | 453 psraw mm2, 0x6 |
| 427 packuswb mm1, mm2 | 454 packuswb mm1, mm2 |
| 455 #if USE_MOVNTQ |
| 428 movntq [ebp], mm1 | 456 movntq [ebp], mm1 |
| 457 #else |
| 458 movq [ebp], mm1 |
| 459 #endif |
| 429 add ebp, 0x8 | 460 add ebp, 0x8 |
| 430 | 461 |
| 431 lscaleend: | 462 lscaleend: |
| 432 cmp ebx, [esp + 32 + 20] | 463 cmp ebx, [esp + 32 + 20] |
| 433 jl lscaleloop | 464 jl lscaleloop |
| 434 popad | 465 popad |
| 435 ret | 466 ret |
| 436 | 467 |
| 437 lscalelastpixel: | 468 lscalelastpixel: |
| 438 paddsw mm1, mm0 | 469 paddsw mm1, mm0 |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 579 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; | 610 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 580 YuvPixel(y, u, v, rgb_buf+4); | 611 YuvPixel(y, u, v, rgb_buf+4); |
| 581 x += source_dx; | 612 x += source_dx; |
| 582 } | 613 } |
| 583 rgb_buf += 8; | 614 rgb_buf += 8; |
| 584 } | 615 } |
| 585 } | 616 } |
| 586 | 617 |
| 587 #endif // USE_MMX | 618 #endif // USE_MMX |
| 588 } // extern "C" | 619 } // extern "C" |
| 589 | |
| OLD | NEW |