| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 // Enable bilinear filtering by turning on the following macro. | 7 extern "C" { |
| 8 // #define MEDIA_BILINEAR_FILTER 1 | |
| 9 | |
| 10 namespace media { | |
| 11 | |
| 12 #define RGBY(i) { \ | 8 #define RGBY(i) { \ |
| 13 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 9 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 14 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 10 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 15 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 11 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 16 0 \ | 12 0 \ |
| 17 } | 13 } |
| 18 | 14 |
| 19 #define RGBU(i) { \ | 15 #define RGBU(i) { \ |
| 20 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ | 16 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ |
| 21 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ | 17 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ |
| 22 0, \ | 18 0, \ |
| 23 static_cast<int16>(256 * 64 - 1) \ | 19 static_cast<int16>(256 * 64 - 1) \ |
| 24 } | 20 } |
| 25 | 21 |
| 26 #define RGBV(i) { \ | 22 #define RGBV(i) { \ |
| 27 0, \ | 23 0, \ |
| 28 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ | 24 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ |
| 29 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ | 25 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ |
| 30 0 \ | 26 0 \ |
| 31 } | 27 } |
| 32 | 28 |
| 33 #define MMX_ALIGNED(var) __declspec(align(16)) var | 29 #define MMX_ALIGNED(var) __declspec(align(16)) var |
| 34 | 30 |
| 35 extern "C" { | 31 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { |
| 36 MMX_ALIGNED(int16 coefficients_RGB_Y[256][4]) = { | |
| 37 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), | 32 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), |
| 38 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), | 33 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), |
| 39 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), | 34 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), |
| 40 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), | 35 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), |
| 41 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), | 36 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), |
| 42 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), | 37 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), |
| 43 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), | 38 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), |
| 44 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), | 39 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), |
| 45 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), | 40 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), |
| 46 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), | 41 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 93 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), | 88 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), |
| 94 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), | 89 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), |
| 95 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), | 90 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), |
| 96 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), | 91 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), |
| 97 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), | 92 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), |
| 98 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), | 93 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), |
| 99 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), | 94 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), |
| 100 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), | 95 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), |
| 101 }; | 96 }; |
| 102 | 97 |
| 103 MMX_ALIGNED(int16 coefficients_RGB_U[256][4]) = { | 98 MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = { |
| 104 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), | 99 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), |
| 105 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), | 100 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), |
| 106 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), | 101 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), |
| 107 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), | 102 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), |
| 108 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), | 103 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), |
| 109 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), | 104 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), |
| 110 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), | 105 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), |
| 111 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), | 106 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), |
| 112 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), | 107 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), |
| 113 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), | 108 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), | 155 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), |
| 161 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), | 156 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), |
| 162 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), | 157 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), |
| 163 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), | 158 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), |
| 164 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), | 159 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), |
| 165 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), | 160 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), |
| 166 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), | 161 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), |
| 167 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), | 162 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), |
| 168 }; | 163 }; |
| 169 | 164 |
| 170 MMX_ALIGNED(int16 coefficients_RGB_V[256][4]) = { | 165 MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = { |
| 171 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), | 166 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), |
| 172 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), | 167 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), |
| 173 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), | 168 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), |
| 174 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), | 169 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), |
| 175 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), | 170 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), |
| 176 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), | 171 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), |
| 177 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), | 172 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), |
| 178 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), | 173 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), |
| 179 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), | 174 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), |
| 180 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), | 175 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 226 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), | 221 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), |
| 227 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), | 222 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), |
| 228 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), | 223 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), |
| 229 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), | 224 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), |
| 230 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), | 225 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), |
| 231 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), | 226 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), |
| 232 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), | 227 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), |
| 233 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), | 228 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), |
| 234 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), | 229 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), |
| 235 }; | 230 }; |
| 236 } // extern "C" | |
| 237 | 231 |
| 238 #undef RGBHY | 232 #undef RGBHY |
| 239 #undef RGBY | 233 #undef RGBY |
| 240 #undef RGBU | 234 #undef RGBU |
| 241 #undef RGBV | 235 #undef RGBV |
| 242 #undef MMX_ALIGNED | 236 #undef MMX_ALIGNED |
| 243 | 237 |
| 244 // Warning C4799: function has no EMMS instruction. | 238 // Warning C4799: function has no EMMS instruction. |
| 245 // EMMS() is slow and should be called by the calling function once per image. | 239 // EMMS() is slow and should be called by the calling function once per image. |
| 246 #pragma warning(disable: 4799) | 240 #pragma warning(disable: 4799) |
| 247 | 241 |
| 248 __declspec(naked) | 242 __declspec(naked) |
| 249 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 243 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 250 const uint8* u_buf, | 244 const uint8* u_buf, |
| 251 const uint8* v_buf, | 245 const uint8* v_buf, |
| 252 uint8* rgb_buf, | 246 uint8* rgb_buf, |
| 253 int width) { | 247 int width) { |
| 254 __asm { | 248 __asm { |
| 255 pushad | 249 pushad |
| 256 mov edx, [esp + 32 + 4] // Y | 250 mov edx, [esp + 32 + 4] // Y |
| 257 mov edi, [esp + 32 + 8] // U | 251 mov edi, [esp + 32 + 8] // U |
| 258 mov esi, [esp + 32 + 12] // V | 252 mov esi, [esp + 32 + 12] // V |
| 259 mov ebp, [esp + 32 + 16] // rgb | 253 mov ebp, [esp + 32 + 16] // rgb |
| 260 mov ecx, [esp + 32 + 20] // width | 254 mov ecx, [esp + 32 + 20] // width |
| 261 jmp wend | 255 jmp convertend |
| 262 | 256 |
| 263 wloop : | 257 convertloop : |
| 264 movzx eax, byte ptr [edi] | 258 movzx eax, byte ptr [edi] |
| 265 add edi, 1 | 259 add edi, 1 |
| 266 movzx ebx, byte ptr [esi] | 260 movzx ebx, byte ptr [esi] |
| 267 add esi, 1 | 261 add esi, 1 |
| 268 movq mm0, [coefficients_RGB_U + 8 * eax] | 262 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 269 movzx eax, byte ptr [edx] | 263 movzx eax, byte ptr [edx] |
| 270 paddsw mm0, [coefficients_RGB_V + 8 * ebx] | 264 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] |
| 271 movzx ebx, byte ptr [edx + 1] | 265 movzx ebx, byte ptr [edx + 1] |
| 272 movq mm1, [coefficients_RGB_Y + 8 * eax] | 266 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 273 add edx, 2 | 267 add edx, 2 |
| 274 movq mm2, [coefficients_RGB_Y + 8 * ebx] | 268 movq mm2, [kCoefficientsRgbY + 8 * ebx] |
| 275 paddsw mm1, mm0 | 269 paddsw mm1, mm0 |
| 276 paddsw mm2, mm0 | 270 paddsw mm2, mm0 |
| 277 psraw mm1, 6 | 271 psraw mm1, 6 |
| 278 psraw mm2, 6 | 272 psraw mm2, 6 |
| 279 packuswb mm1, mm2 | 273 packuswb mm1, mm2 |
| 280 movntq [ebp], mm1 | 274 movntq [ebp], mm1 |
| 281 add ebp, 8 | 275 add ebp, 8 |
| 282 wend : | 276 convertend : |
| 283 sub ecx, 2 | 277 sub ecx, 2 |
| 284 jns wloop | 278 jns convertloop |
| 285 | 279 |
| 286 and ecx, 1 // odd number of pixels? | 280 and ecx, 1 // odd number of pixels? |
| 287 jz wdone | 281 jz convertdone |
| 288 | 282 |
| 289 movzx eax, byte ptr [edi] | 283 movzx eax, byte ptr [edi] |
| 290 movq mm0, [coefficients_RGB_U + 8 * eax] | 284 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 291 movzx eax, byte ptr [esi] | 285 movzx eax, byte ptr [esi] |
| 292 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 286 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 293 movzx eax, byte ptr [edx] | 287 movzx eax, byte ptr [edx] |
| 294 movq mm1, [coefficients_RGB_Y + 8 * eax] | 288 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 295 paddsw mm1, mm0 | 289 paddsw mm1, mm0 |
| 296 psraw mm1, 6 | 290 psraw mm1, 6 |
| 297 packuswb mm1, mm1 | 291 packuswb mm1, mm1 |
| 298 movd [ebp], mm1 | 292 movd [ebp], mm1 |
| 299 wdone : | 293 convertdone : |
| 300 | 294 |
| 301 popad | 295 popad |
| 302 ret | 296 ret |
| 303 } | 297 } |
| 304 } | 298 } |
| 305 | 299 |
| 306 __declspec(naked) | 300 __declspec(naked) |
| 307 void ConvertYUVToRGB32Row(const uint8* y_buf, | 301 void ConvertYUVToRGB32Row(const uint8* y_buf, |
| 308 const uint8* u_buf, | 302 const uint8* u_buf, |
| 309 const uint8* v_buf, | 303 const uint8* v_buf, |
| 310 uint8* rgb_buf, | 304 uint8* rgb_buf, |
| 311 int width, | 305 int width, |
| 312 int step) { | 306 int step) { |
| 313 __asm { | 307 __asm { |
| 314 pushad | 308 pushad |
| 315 mov edx, [esp + 32 + 4] // Y | 309 mov edx, [esp + 32 + 4] // Y |
| 316 mov edi, [esp + 32 + 8] // U | 310 mov edi, [esp + 32 + 8] // U |
| 317 mov esi, [esp + 32 + 12] // V | 311 mov esi, [esp + 32 + 12] // V |
| 318 mov ebp, [esp + 32 + 16] // rgb | 312 mov ebp, [esp + 32 + 16] // rgb |
| 319 mov ecx, [esp + 32 + 20] // width | 313 mov ecx, [esp + 32 + 20] // width |
| 320 mov ebx, [esp + 32 + 24] // step | 314 mov ebx, [esp + 32 + 24] // step |
| 321 jmp wend | 315 jmp wend |
| 322 | 316 |
| 323 wloop : | 317 wloop : |
| 324 movzx eax, byte ptr [edi] | 318 movzx eax, byte ptr [edi] |
| 325 add edi, ebx | 319 add edi, ebx |
| 326 movq mm0, [coefficients_RGB_U + 8 * eax] | 320 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 327 movzx eax, byte ptr [esi] | 321 movzx eax, byte ptr [esi] |
| 328 add esi, ebx | 322 add esi, ebx |
| 329 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 323 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 330 movzx eax, byte ptr [edx] | 324 movzx eax, byte ptr [edx] |
| 331 add edx, ebx | 325 add edx, ebx |
| 332 movq mm1, [coefficients_RGB_Y + 8 * eax] | 326 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 333 movzx eax, byte ptr [edx] | 327 movzx eax, byte ptr [edx] |
| 334 add edx, ebx | 328 add edx, ebx |
| 335 movq mm2, [coefficients_RGB_Y + 8 * eax] | 329 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 336 paddsw mm1, mm0 | 330 paddsw mm1, mm0 |
| 337 paddsw mm2, mm0 | 331 paddsw mm2, mm0 |
| 338 psraw mm1, 6 | 332 psraw mm1, 6 |
| 339 psraw mm2, 6 | 333 psraw mm2, 6 |
| 340 packuswb mm1, mm2 | 334 packuswb mm1, mm2 |
| 341 movntq [ebp], mm1 | 335 movntq [ebp], mm1 |
| 342 add ebp, 8 | 336 add ebp, 8 |
| 343 wend : | 337 wend : |
| 344 sub ecx, 2 | 338 sub ecx, 2 |
| 345 jns wloop | 339 jns wloop |
| 346 | 340 |
| 347 and ecx, 1 // odd number of pixels? | 341 and ecx, 1 // odd number of pixels? |
| 348 jz wdone | 342 jz wdone |
| 349 | 343 |
| 350 movzx eax, byte ptr [edi] | 344 movzx eax, byte ptr [edi] |
| 351 movq mm0, [coefficients_RGB_U + 8 * eax] | 345 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 352 movzx eax, byte ptr [esi] | 346 movzx eax, byte ptr [esi] |
| 353 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 347 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 354 movzx eax, byte ptr [edx] | 348 movzx eax, byte ptr [edx] |
| 355 movq mm1, [coefficients_RGB_Y + 8 * eax] | 349 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 356 paddsw mm1, mm0 | 350 paddsw mm1, mm0 |
| 357 psraw mm1, 6 | 351 psraw mm1, 6 |
| 358 packuswb mm1, mm1 | 352 packuswb mm1, mm1 |
| 359 movd [ebp], mm1 | 353 movd [ebp], mm1 |
| 360 wdone : | 354 wdone : |
| 361 | 355 |
| 362 popad | 356 popad |
| 363 ret | 357 ret |
| 364 } | 358 } |
| 365 } | 359 } |
| (...skipping 12 matching lines...) Expand all Loading... |
| 378 mov edi, [esp + 32 + 8] // U | 372 mov edi, [esp + 32 + 8] // U |
| 379 mov esi, [esp + 32 + 12] // V | 373 mov esi, [esp + 32 + 12] // V |
| 380 mov ebp, [esp + 32 + 16] // rgb | 374 mov ebp, [esp + 32 + 16] // rgb |
| 381 mov ecx, [esp + 32 + 20] // width | 375 mov ecx, [esp + 32 + 20] // width |
| 382 jmp wend | 376 jmp wend |
| 383 | 377 |
| 384 wloop : | 378 wloop : |
| 385 movzx eax, byte ptr [edi] | 379 movzx eax, byte ptr [edi] |
| 386 mov ebx, [esp + 32 + 28] // uvstep | 380 mov ebx, [esp + 32 + 28] // uvstep |
| 387 add edi, ebx | 381 add edi, ebx |
| 388 movq mm0, [coefficients_RGB_U + 8 * eax] | 382 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 389 movzx eax, byte ptr [esi] | 383 movzx eax, byte ptr [esi] |
| 390 add esi, ebx | 384 add esi, ebx |
| 391 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 385 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 392 movzx eax, byte ptr [edx] | 386 movzx eax, byte ptr [edx] |
| 393 mov ebx, [esp + 32 + 24] // ystep | 387 mov ebx, [esp + 32 + 24] // ystep |
| 394 add edx, ebx | 388 add edx, ebx |
| 395 movq mm1, [coefficients_RGB_Y + 8 * eax] | 389 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 396 movzx eax, byte ptr [edx] | 390 movzx eax, byte ptr [edx] |
| 397 add edx, ebx | 391 add edx, ebx |
| 398 movq mm2, [coefficients_RGB_Y + 8 * eax] | 392 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 399 paddsw mm1, mm0 | 393 paddsw mm1, mm0 |
| 400 paddsw mm2, mm0 | 394 paddsw mm2, mm0 |
| 401 psraw mm1, 6 | 395 psraw mm1, 6 |
| 402 psraw mm2, 6 | 396 psraw mm2, 6 |
| 403 packuswb mm1, mm2 | 397 packuswb mm1, mm2 |
| 404 movntq [ebp], mm1 | 398 movntq [ebp], mm1 |
| 405 add ebp, 8 | 399 add ebp, 8 |
| 406 wend : | 400 wend : |
| 407 sub ecx, 2 | 401 sub ecx, 2 |
| 408 jns wloop | 402 jns wloop |
| 409 | 403 |
| 410 and ecx, 1 // odd number of pixels? | 404 and ecx, 1 // odd number of pixels? |
| 411 jz wdone | 405 jz wdone |
| 412 | 406 |
| 413 movzx eax, byte ptr [edi] | 407 movzx eax, byte ptr [edi] |
| 414 movq mm0, [coefficients_RGB_U + 8 * eax] | 408 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 415 movzx eax, byte ptr [esi] | 409 movzx eax, byte ptr [esi] |
| 416 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 410 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 417 movzx eax, byte ptr [edx] | 411 movzx eax, byte ptr [edx] |
| 418 movq mm1, [coefficients_RGB_Y + 8 * eax] | 412 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 419 paddsw mm1, mm0 | 413 paddsw mm1, mm0 |
| 420 psraw mm1, 6 | 414 psraw mm1, 6 |
| 421 packuswb mm1, mm1 | 415 packuswb mm1, mm1 |
| 422 movd [ebp], mm1 | 416 movd [ebp], mm1 |
| 423 wdone : | 417 wdone : |
| 424 | 418 |
| 425 popad | 419 popad |
| 426 ret | 420 ret |
| 427 } | 421 } |
| 428 } | 422 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 440 mov esi, [esp + 32 + 12] // V | 434 mov esi, [esp + 32 + 12] // V |
| 441 mov ebp, [esp + 32 + 16] // rgb | 435 mov ebp, [esp + 32 + 16] // rgb |
| 442 mov ecx, [esp + 32 + 20] // width | 436 mov ecx, [esp + 32 + 20] // width |
| 443 jmp wend | 437 jmp wend |
| 444 | 438 |
| 445 wloop : | 439 wloop : |
| 446 movzx eax, byte ptr [edi] | 440 movzx eax, byte ptr [edi] |
| 447 add edi, 1 | 441 add edi, 1 |
| 448 movzx ebx, byte ptr [esi] | 442 movzx ebx, byte ptr [esi] |
| 449 add esi, 1 | 443 add esi, 1 |
| 450 movq mm0, [coefficients_RGB_U + 8 * eax] | 444 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 451 movzx eax, byte ptr [edx] | 445 movzx eax, byte ptr [edx] |
| 452 paddsw mm0, [coefficients_RGB_V + 8 * ebx] | 446 paddsw mm0, [kCoefficientsRgbV + 8 * ebx] |
| 453 movq mm1, [coefficients_RGB_Y + 8 * eax] | 447 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 454 paddsw mm1, mm0 | 448 paddsw mm1, mm0 |
| 455 psraw mm1, 6 | 449 psraw mm1, 6 |
| 456 packuswb mm1, mm1 | 450 packuswb mm1, mm1 |
| 457 punpckldq mm1, mm1 | 451 punpckldq mm1, mm1 |
| 458 movntq [ebp], mm1 | 452 movntq [ebp], mm1 |
| 459 | 453 |
| 460 movzx ebx, byte ptr [edx + 1] | 454 movzx ebx, byte ptr [edx + 1] |
| 461 add edx, 2 | 455 add edx, 2 |
| 462 paddsw mm0, [coefficients_RGB_Y + 8 * ebx] | 456 paddsw mm0, [kCoefficientsRgbY + 8 * ebx] |
| 463 psraw mm0, 6 | 457 psraw mm0, 6 |
| 464 packuswb mm0, mm0 | 458 packuswb mm0, mm0 |
| 465 punpckldq mm0, mm0 | 459 punpckldq mm0, mm0 |
| 466 movntq [ebp+8], mm0 | 460 movntq [ebp+8], mm0 |
| 467 add ebp, 16 | 461 add ebp, 16 |
| 468 wend : | 462 wend : |
| 469 sub ecx, 4 | 463 sub ecx, 4 |
| 470 jns wloop | 464 jns wloop |
| 471 | 465 |
| 472 add ecx, 4 | 466 add ecx, 4 |
| 473 jz wdone | 467 jz wdone |
| 474 | 468 |
| 475 movzx eax, byte ptr [edi] | 469 movzx eax, byte ptr [edi] |
| 476 movq mm0, [coefficients_RGB_U + 8 * eax] | 470 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 477 movzx eax, byte ptr [esi] | 471 movzx eax, byte ptr [esi] |
| 478 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 472 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 479 movzx eax, byte ptr [edx] | 473 movzx eax, byte ptr [edx] |
| 480 movq mm1, [coefficients_RGB_Y + 8 * eax] | 474 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 481 paddsw mm1, mm0 | 475 paddsw mm1, mm0 |
| 482 psraw mm1, 6 | 476 psraw mm1, 6 |
| 483 packuswb mm1, mm1 | 477 packuswb mm1, mm1 |
| 484 jmp wend1 | 478 jmp wend1 |
| 485 | 479 |
| 486 wloop1 : | 480 wloop1 : |
| 487 movd [ebp], mm1 | 481 movd [ebp], mm1 |
| 488 add ebp, 4 | 482 add ebp, 4 |
| 489 wend1 : | 483 wend1 : |
| 490 sub ecx, 1 | 484 sub ecx, 1 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 507 int width, | 501 int width, |
| 508 int dx) { | 502 int dx) { |
| 509 __asm { | 503 __asm { |
| 510 pushad | 504 pushad |
| 511 mov edx, [esp + 32 + 4] // Y | 505 mov edx, [esp + 32 + 4] // Y |
| 512 mov edi, [esp + 32 + 8] // U | 506 mov edi, [esp + 32 + 8] // U |
| 513 mov esi, [esp + 32 + 12] // V | 507 mov esi, [esp + 32 + 12] // V |
| 514 mov ebp, [esp + 32 + 16] // rgb | 508 mov ebp, [esp + 32 + 16] // rgb |
| 515 mov ecx, [esp + 32 + 20] // width | 509 mov ecx, [esp + 32 + 20] // width |
| 516 xor ebx, ebx // x | 510 xor ebx, ebx // x |
| 517 jmp wend | 511 jmp scaleend |
| 518 | 512 |
| 519 wloop : | 513 scaleloop : |
| 520 mov eax, ebx | 514 mov eax, ebx |
| 521 sar eax, 5 | 515 sar eax, 5 |
| 522 movzx eax, byte ptr [edi + eax] | 516 movzx eax, byte ptr [edi + eax] |
| 523 movq mm0, [coefficients_RGB_U + 8 * eax] | 517 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 524 mov eax, ebx | 518 mov eax, ebx |
| 525 sar eax, 5 | 519 sar eax, 5 |
| 526 movzx eax, byte ptr [esi + eax] | 520 movzx eax, byte ptr [esi + eax] |
| 527 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 521 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 528 mov eax, ebx | 522 mov eax, ebx |
| 529 add ebx, [esp + 32 + 24] // x += dx | 523 add ebx, [esp + 32 + 24] // x += dx |
| 530 sar eax, 4 | 524 sar eax, 4 |
| 531 movzx eax, byte ptr [edx + eax] | 525 movzx eax, byte ptr [edx + eax] |
| 532 movq mm1, [coefficients_RGB_Y + 8 * eax] | 526 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 533 mov eax, ebx | 527 mov eax, ebx |
| 534 add ebx, [esp + 32 + 24] // x += dx | 528 add ebx, [esp + 32 + 24] // x += dx |
| 535 sar eax, 4 | 529 sar eax, 4 |
| 536 movzx eax, byte ptr [edx + eax] | 530 movzx eax, byte ptr [edx + eax] |
| 537 movq mm2, [coefficients_RGB_Y + 8 * eax] | 531 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 538 paddsw mm1, mm0 | 532 paddsw mm1, mm0 |
| 539 paddsw mm2, mm0 | 533 paddsw mm2, mm0 |
| 540 psraw mm1, 6 | 534 psraw mm1, 6 |
| 541 psraw mm2, 6 | 535 psraw mm2, 6 |
| 542 packuswb mm1, mm2 | 536 packuswb mm1, mm2 |
| 543 movntq [ebp], mm1 | 537 movntq [ebp], mm1 |
| 544 add ebp, 8 | 538 add ebp, 8 |
| 545 wend : | 539 scaleend : |
| 546 sub ecx, 2 | 540 sub ecx, 2 |
| 547 jns wloop | 541 jns scaleloop |
| 548 | 542 |
| 549 and ecx, 1 // odd number of pixels? | 543 and ecx, 1 // odd number of pixels? |
| 550 jz wdone | 544 jz scaledone |
| 551 | 545 |
| 552 mov eax, ebx | 546 mov eax, ebx |
| 553 sar eax, 5 | 547 sar eax, 5 |
| 554 movzx eax, byte ptr [edi + eax] | 548 movzx eax, byte ptr [edi + eax] |
| 555 movq mm0, [coefficients_RGB_U + 8 * eax] | 549 movq mm0, [kCoefficientsRgbU + 8 * eax] |
| 556 mov eax, ebx | 550 mov eax, ebx |
| 557 sar eax, 5 | 551 sar eax, 5 |
| 558 movzx eax, byte ptr [esi + eax] | 552 movzx eax, byte ptr [esi + eax] |
| 559 paddsw mm0, [coefficients_RGB_V + 8 * eax] | 553 paddsw mm0, [kCoefficientsRgbV + 8 * eax] |
| 560 mov eax, ebx | 554 mov eax, ebx |
| 561 sar eax, 4 | 555 sar eax, 4 |
| 562 movzx eax, byte ptr [edx + eax] | 556 movzx eax, byte ptr [edx + eax] |
| 563 movq mm1, [coefficients_RGB_Y + 8 * eax] | 557 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 564 mov eax, ebx | 558 mov eax, ebx |
| 565 sar eax, 4 | 559 sar eax, 4 |
| 566 movzx eax, byte ptr [edx + eax] | 560 movzx eax, byte ptr [edx + eax] |
| 567 movq mm2, [coefficients_RGB_Y + 8 * eax] | 561 movq mm2, [kCoefficientsRgbY + 8 * eax] |
| 568 paddsw mm1, mm0 | 562 paddsw mm1, mm0 |
| 569 paddsw mm2, mm0 | 563 paddsw mm2, mm0 |
| 570 psraw mm1, 6 | 564 psraw mm1, 6 |
| 571 psraw mm2, 6 | 565 psraw mm2, 6 |
| 572 packuswb mm1, mm2 | 566 packuswb mm1, mm2 |
| 573 movd [ebp], mm1 | 567 movd [ebp], mm1 |
| 574 | 568 |
| 575 wdone : | 569 scaledone : |
| 576 | |
| 577 popad | 570 popad |
| 578 ret | 571 ret |
| 579 } | 572 } |
| 580 } | 573 } |
| 574 } // extern "C" |
| 581 | 575 |
| 582 } // namespace media | |
| 583 | |
| OLD | NEW |