| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 | |
| 12 /**************************************************************************** | |
| 13 * | |
| 14 * Module Title : scaleopt.cpp | |
| 15 * | |
| 16 * Description : Optimized scaling functions | |
| 17 * | |
| 18 ****************************************************************************/ | |
| 19 #include "pragmas.h" | |
| 20 | |
| 21 /**************************************************************************** | |
| 22 * Module Statics | |
| 23 ****************************************************************************/ | |
| 24 __declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 1
28, 128 }; | |
| 25 | |
| 26 #include "vpx_scale/vpx_scale.h" | |
| 27 #include "vpx_mem/vpx_mem.h" | |
| 28 | |
| 29 __declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128,
192 }; | |
| 30 __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,
64 }; | |
| 31 | |
| 32 | |
| 33 /**************************************************************************** | |
| 34 * | |
| 35 * ROUTINE : horizontal_line_5_4_scale_mmx | |
| 36 * | |
| 37 * INPUTS : const unsigned char *source : Pointer to source data. | |
| 38 * unsigned int source_width : Stride of source. | |
| 39 * unsigned char *dest : Pointer to destination data. | |
| 40 * unsigned int dest_width : Stride of destination (NOT US
ED). | |
| 41 * | |
| 42 * OUTPUTS : None. | |
| 43 * | |
| 44 * RETURNS : void | |
| 45 * | |
| 46 * FUNCTION : Copies horizontal line of pixels from source to | |
| 47 * destination scaling up by 4 to 5. | |
| 48 * | |
| 49 * SPECIAL NOTES : None. | |
| 50 * | |
| 51 ****************************************************************************/ | |
| 52 static | |
| 53 void horizontal_line_5_4_scale_mmx | |
| 54 ( | |
| 55 const unsigned char *source, | |
| 56 unsigned int source_width, | |
| 57 unsigned char *dest, | |
| 58 unsigned int dest_width | |
| 59 ) { | |
| 60 /* | |
| 61 unsigned i; | |
| 62 unsigned int a, b, c, d, e; | |
| 63 unsigned char *des = dest; | |
| 64 const unsigned char *src = source; | |
| 65 | |
| 66 (void) dest_width; | |
| 67 | |
| 68 for ( i=0; i<source_width; i+=5 ) | |
| 69 { | |
| 70 a = src[0]; | |
| 71 b = src[1]; | |
| 72 c = src[2]; | |
| 73 d = src[3]; | |
| 74 e = src[4]; | |
| 75 | |
| 76 des[0] = a; | |
| 77 des[1] = ((b*192 + c* 64 + 128)>>8); | |
| 78 des[2] = ((c*128 + d*128 + 128)>>8); | |
| 79 des[3] = ((d* 64 + e*192 + 128)>>8); | |
| 80 | |
| 81 src += 5; | |
| 82 des += 4; | |
| 83 } | |
| 84 */ | |
| 85 (void) dest_width; | |
| 86 | |
| 87 __asm { | |
| 88 | |
| 89 mov esi, source; | |
| 90 mov edi, dest; | |
| 91 | |
| 92 mov ecx, source_width; | |
| 93 movq mm5, const54_1; | |
| 94 | |
| 95 pxor mm7, mm7; | |
| 96 movq mm6, const54_2; | |
| 97 | |
| 98 movq mm4, round_values; | |
| 99 lea edx, [esi+ecx]; | |
| 100 horizontal_line_5_4_loop: | |
| 101 | |
| 102 movq mm0, QWORD PTR [esi]; | |
| 103 00 01 02 03 04 05 06 07 | |
| 104 movq mm1, mm0; | |
| 105 00 01 02 03 04 05 06 07 | |
| 106 | |
| 107 psrlq mm0, 8; | |
| 108 01 02 03 04 05 06 07 xx | |
| 109 punpcklbw mm1, mm7; | |
| 110 xx 00 xx 01 xx 02 xx 03 | |
| 111 | |
| 112 punpcklbw mm0, mm7; | |
| 113 xx 01 xx 02 xx 03 xx 04 | |
| 114 pmullw mm1, mm5 | |
| 115 | |
| 116 pmullw mm0, mm6 | |
| 117 add esi, 5 | |
| 118 | |
| 119 add edi, 4 | |
| 120 paddw mm1, mm0 | |
| 121 | |
| 122 paddw mm1, mm4 | |
| 123 psrlw mm1, 8 | |
| 124 | |
| 125 cmp esi, edx | |
| 126 packuswb mm1, mm7 | |
| 127 | |
| 128 movd DWORD PTR [edi-4], mm1 | |
| 129 | |
| 130 jl horizontal_line_5_4_loop | |
| 131 | |
| 132 } | |
| 133 | |
| 134 } | |
| 135 __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64,
64, 64 }; | |
| 136 __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128,
128, 128 }; | |
| 137 __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192,
192, 192 }; | |
| 138 | |
| 139 static | |
| 140 void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
| 141 | |
| 142 __asm { | |
| 143 push ebx | |
| 144 | |
| 145 mov esi, source // Get the source and destinat
ion pointer | |
| 146 mov ecx, src_pitch // Get the pitch size | |
| 147 | |
| 148 mov edi, dest // tow lines below | |
| 149 pxor mm7, mm7 // clear out mm7 | |
| 150 | |
| 151 mov edx, dest_pitch // Loop counter | |
| 152 mov ebx, dest_width | |
| 153 | |
| 154 vs_5_4_loop: | |
| 155 | |
| 156 movd mm0, DWORD ptr [esi] // src[0]; | |
| 157 movd mm1, DWORD ptr [esi+ecx] // src[1]; | |
| 158 | |
| 159 movd mm2, DWORD ptr [esi+ecx*2] | |
| 160 lea eax, [esi+ecx*2] // | |
| 161 | |
| 162 punpcklbw mm1, mm7 | |
| 163 punpcklbw mm2, mm7 | |
| 164 | |
| 165 movq mm3, mm2 | |
| 166 pmullw mm1, three_fourths | |
| 167 | |
| 168 pmullw mm2, one_fourths | |
| 169 movd mm4, [eax+ecx] | |
| 170 | |
| 171 pmullw mm3, two_fourths | |
| 172 punpcklbw mm4, mm7 | |
| 173 | |
| 174 movq mm5, mm4 | |
| 175 pmullw mm4, two_fourths | |
| 176 | |
| 177 paddw mm1, mm2 | |
| 178 movd mm6, [eax+ecx*2] | |
| 179 | |
| 180 pmullw mm5, one_fourths | |
| 181 paddw mm1, round_values; | |
| 182 | |
| 183 paddw mm3, mm4 | |
| 184 psrlw mm1, 8 | |
| 185 | |
| 186 punpcklbw mm6, mm7 | |
| 187 paddw mm3, round_values | |
| 188 | |
| 189 pmullw mm6, three_fourths | |
| 190 psrlw mm3, 8 | |
| 191 | |
| 192 packuswb mm1, mm7 | |
| 193 packuswb mm3, mm7 | |
| 194 | |
| 195 movd DWORD PTR [edi], mm0 | |
| 196 movd DWORD PTR [edi+edx], mm1 | |
| 197 | |
| 198 | |
| 199 paddw mm5, mm6 | |
| 200 movd DWORD PTR [edi+edx*2], mm3 | |
| 201 | |
| 202 lea eax, [edi+edx*2] | |
| 203 paddw mm5, round_values | |
| 204 | |
| 205 psrlw mm5, 8 | |
| 206 add edi, 4 | |
| 207 | |
| 208 packuswb mm5, mm7 | |
| 209 movd DWORD PTR [eax+edx], mm5 | |
| 210 | |
| 211 add esi, 4 | |
| 212 sub ebx, 4 | |
| 213 | |
| 214 jg vs_5_4_loop | |
| 215 | |
| 216 pop ebx | |
| 217 } | |
| 218 } | |
| 219 | |
| 220 | |
| 221 __declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171,
0 }; | |
| 222 __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85,
0 }; | |
| 223 | |
| 224 | |
| 225 static | |
| 226 void horizontal_line_5_3_scale_mmx | |
| 227 ( | |
| 228 const unsigned char *source, | |
| 229 unsigned int source_width, | |
| 230 unsigned char *dest, | |
| 231 unsigned int dest_width | |
| 232 ) { | |
| 233 | |
| 234 (void) dest_width; | |
| 235 __asm { | |
| 236 | |
| 237 mov esi, source; | |
| 238 mov edi, dest; | |
| 239 | |
| 240 mov ecx, source_width; | |
| 241 movq mm5, const53_1; | |
| 242 | |
| 243 pxor mm7, mm7; | |
| 244 movq mm6, const53_2; | |
| 245 | |
| 246 movq mm4, round_values; | |
| 247 lea edx, [esi+ecx-5]; | |
| 248 horizontal_line_5_3_loop: | |
| 249 | |
| 250 movq mm0, QWORD PTR [esi]; | |
| 251 00 01 02 03 04 05 06 07 | |
| 252 movq mm1, mm0; | |
| 253 00 01 02 03 04 05 06 07 | |
| 254 | |
| 255 psllw mm0, 8; | |
| 256 xx 00 xx 02 xx 04 xx 06 | |
| 257 psrlw mm1, 8; | |
| 258 01 xx 03 xx 05 xx 07 xx | |
| 259 | |
| 260 psrlw mm0, 8; | |
| 261 00 xx 02 xx 04 xx 06 xx | |
| 262 psllq mm1, 16; | |
| 263 xx xx 01 xx 03 xx 05 xx | |
| 264 | |
| 265 pmullw mm0, mm6 | |
| 266 | |
| 267 pmullw mm1, mm5 | |
| 268 add esi, 5 | |
| 269 | |
| 270 add edi, 3 | |
| 271 paddw mm1, mm0 | |
| 272 | |
| 273 paddw mm1, mm4 | |
| 274 psrlw mm1, 8 | |
| 275 | |
| 276 cmp esi, edx | |
| 277 packuswb mm1, mm7 | |
| 278 | |
| 279 movd DWORD PTR [edi-3], mm1 | |
| 280 jl horizontal_line_5_3_loop | |
| 281 | |
| 282 // exit condition | |
| 283 movq mm0, QWORD PTR [esi]; | |
| 284 00 01 02 03 04 05 06 07 | |
| 285 movq mm1, mm0; | |
| 286 00 01 02 03 04 05 06 07 | |
| 287 | |
| 288 psllw mm0, 8; | |
| 289 xx 00 xx 02 xx 04 xx 06 | |
| 290 psrlw mm1, 8; | |
| 291 01 xx 03 xx 05 xx 07 xx | |
| 292 | |
| 293 psrlw mm0, 8; | |
| 294 00 xx 02 xx 04 xx 06 xx | |
| 295 psllq mm1, 16; | |
| 296 xx xx 01 xx 03 xx 05 xx | |
| 297 | |
| 298 pmullw mm0, mm6 | |
| 299 | |
| 300 pmullw mm1, mm5 | |
| 301 paddw mm1, mm0 | |
| 302 | |
| 303 paddw mm1, mm4 | |
| 304 psrlw mm1, 8 | |
| 305 | |
| 306 packuswb mm1, mm7 | |
| 307 movd eax, mm1 | |
| 308 | |
| 309 mov edx, eax | |
| 310 shr edx, 16 | |
| 311 | |
| 312 mov WORD PTR[edi], ax | |
| 313 mov BYTE PTR[edi+2], dl | |
| 314 | |
| 315 } | |
| 316 | |
| 317 } | |
| 318 | |
| 319 __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85
, 85 }; | |
| 320 __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171
, 171 }; | |
| 321 | |
| 322 static | |
| 323 void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
| 324 | |
| 325 __asm { | |
| 326 push ebx | |
| 327 | |
| 328 mov esi, source // Get the source and destinat
ion pointer | |
| 329 mov ecx, src_pitch // Get the pitch size | |
| 330 | |
| 331 mov edi, dest // tow lines below | |
| 332 pxor mm7, mm7 // clear out mm7 | |
| 333 | |
| 334 mov edx, dest_pitch // Loop counter | |
| 335 movq mm5, one_thirds | |
| 336 | |
| 337 movq mm6, two_thirds | |
| 338 mov ebx, dest_width; | |
| 339 | |
| 340 vs_5_3_loop: | |
| 341 | |
| 342 movd mm0, DWORD ptr [esi] // src[0]; | |
| 343 movd mm1, DWORD ptr [esi+ecx] // src[1]; | |
| 344 | |
| 345 movd mm2, DWORD ptr [esi+ecx*2] | |
| 346 lea eax, [esi+ecx*2] // | |
| 347 | |
| 348 punpcklbw mm1, mm7 | |
| 349 punpcklbw mm2, mm7 | |
| 350 | |
| 351 pmullw mm1, mm5 | |
| 352 pmullw mm2, mm6 | |
| 353 | |
| 354 movd mm3, DWORD ptr [eax+ecx] | |
| 355 movd mm4, DWORD ptr [eax+ecx*2] | |
| 356 | |
| 357 punpcklbw mm3, mm7 | |
| 358 punpcklbw mm4, mm7 | |
| 359 | |
| 360 pmullw mm3, mm6 | |
| 361 pmullw mm4, mm5 | |
| 362 | |
| 363 | |
| 364 movd DWORD PTR [edi], mm0 | |
| 365 paddw mm1, mm2 | |
| 366 | |
| 367 paddw mm1, round_values | |
| 368 psrlw mm1, 8 | |
| 369 | |
| 370 packuswb mm1, mm7 | |
| 371 paddw mm3, mm4 | |
| 372 | |
| 373 paddw mm3, round_values | |
| 374 movd DWORD PTR [edi+edx], mm1 | |
| 375 | |
| 376 psrlw mm3, 8 | |
| 377 packuswb mm3, mm7 | |
| 378 | |
| 379 movd DWORD PTR [edi+edx*2], mm3 | |
| 380 | |
| 381 | |
| 382 add edi, 4 | |
| 383 add esi, 4 | |
| 384 | |
| 385 sub ebx, 4 | |
| 386 jg vs_5_3_loop | |
| 387 | |
| 388 pop ebx | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 | |
| 393 | |
| 394 | |
| 395 /**************************************************************************** | |
| 396 * | |
| 397 * ROUTINE : horizontal_line_2_1_scale | |
| 398 * | |
| 399 * INPUTS : const unsigned char *source : | |
| 400 * unsigned int source_width : | |
| 401 * unsigned char *dest : | |
| 402 * unsigned int dest_width : | |
| 403 * | |
| 404 * OUTPUTS : None. | |
| 405 * | |
| 406 * RETURNS : void | |
| 407 * | |
| 408 * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. | |
| 409 * | |
| 410 * SPECIAL NOTES : None. | |
| 411 * | |
| 412 ****************************************************************************/ | |
| 413 static | |
| 414 void horizontal_line_2_1_scale_mmx | |
| 415 ( | |
| 416 const unsigned char *source, | |
| 417 unsigned int source_width, | |
| 418 unsigned char *dest, | |
| 419 unsigned int dest_width | |
| 420 ) { | |
| 421 (void) dest_width; | |
| 422 (void) source_width; | |
| 423 __asm { | |
| 424 mov esi, source | |
| 425 mov edi, dest | |
| 426 | |
| 427 pxor mm7, mm7 | |
| 428 mov ecx, dest_width | |
| 429 | |
| 430 xor edx, edx | |
| 431 hs_2_1_loop: | |
| 432 | |
| 433 movq mm0, [esi+edx*2] | |
| 434 psllw mm0, 8 | |
| 435 | |
| 436 psrlw mm0, 8 | |
| 437 packuswb mm0, mm7 | |
| 438 | |
| 439 movd DWORD Ptr [edi+edx], mm0; | |
| 440 add edx, 4 | |
| 441 | |
| 442 cmp edx, ecx | |
| 443 jl hs_2_1_loop | |
| 444 | |
| 445 } | |
| 446 } | |
| 447 | |
| 448 | |
| 449 | |
| 450 static | |
| 451 void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
| 452 (void) dest_pitch; | |
| 453 (void) src_pitch; | |
| 454 vpx_memcpy(dest, source, dest_width); | |
| 455 } | |
| 456 | |
| 457 | |
| 458 __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4
8, 48, 48 }; | |
| 459 __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 16
0, 160, 160 }; | |
| 460 | |
| 461 static | |
| 462 void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch
, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
| 463 | |
| 464 (void) dest_pitch; | |
| 465 __asm { | |
| 466 mov esi, source | |
| 467 mov edi, dest | |
| 468 | |
| 469 mov eax, src_pitch | |
| 470 mov edx, dest_width | |
| 471 | |
| 472 pxor mm7, mm7 | |
| 473 sub esi, eax // back one line | |
| 474 | |
| 475 | |
| 476 lea ecx, [esi+edx]; | |
| 477 movq mm6, round_values; | |
| 478 | |
| 479 movq mm5, three_sixteenths; | |
| 480 movq mm4, ten_sixteenths; | |
| 481 | |
| 482 vs_2_1_i_loop: | |
| 483 movd mm0, [esi] // | |
| 484 movd mm1, [esi+eax] // | |
| 485 | |
| 486 movd mm2, [esi+eax*2] // | |
| 487 punpcklbw mm0, mm7 | |
| 488 | |
| 489 pmullw mm0, mm5 | |
| 490 punpcklbw mm1, mm7 | |
| 491 | |
| 492 pmullw mm1, mm4 | |
| 493 punpcklbw mm2, mm7 | |
| 494 | |
| 495 pmullw mm2, mm5 | |
| 496 paddw mm0, round_values | |
| 497 | |
| 498 paddw mm1, mm2 | |
| 499 paddw mm0, mm1 | |
| 500 | |
| 501 psrlw mm0, 8 | |
| 502 packuswb mm0, mm7 | |
| 503 | |
| 504 movd DWORD PTR [edi], mm0 | |
| 505 add esi, 4 | |
| 506 | |
| 507 add edi, 4; | |
| 508 cmp esi, ecx | |
| 509 jl vs_2_1_i_loop | |
| 510 | |
| 511 } | |
| 512 } | |
| 513 | |
| 514 | |
| 515 | |
| 516 void | |
| 517 register_mmxscalers(void) { | |
| 518 vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; | |
| 519 vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; | |
| 520 vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; | |
| 521 vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; | |
| 522 vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; | |
| 523 vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; | |
| 524 vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; | |
| 525 } | |
| OLD | NEW |