Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(764)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm

Issue 958693004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 %endif 192 %endif
193 %endif 193 %endif
194 194
195 ASSERT %1 <= 16 ; m6 overflows if w > 16 195 ASSERT %1 <= 16 ; m6 overflows if w > 16
196 pxor m6, m6 ; sum 196 pxor m6, m6 ; sum
197 pxor m7, m7 ; sse 197 pxor m7, m7 ; sse
198 198
199 %if %1 < 16 199 %if %1 < 16
200 sar h, 1 200 sar h, 1
201 %endif 201 %endif
202 %if %2 == 1 ; avg
203 shl sec_str, 1
204 %endif
202 205
203 ; FIXME(rbultje) replace by jumptable? 206 ; FIXME(rbultje) replace by jumptable?
204 test x_offsetd, x_offsetd 207 test x_offsetd, x_offsetd
205 jnz .x_nonzero 208 jnz .x_nonzero
206 ; x_offset == 0 209 ; x_offset == 0
207 test y_offsetd, y_offsetd 210 test y_offsetd, y_offsetd
208 jnz .x_zero_y_nonzero 211 jnz .x_zero_y_nonzero
209 212
210 ; x_offset == 0 && y_offset == 0 213 ; x_offset == 0 && y_offset == 0
211 .x_zero_y_zero_loop: 214 .x_zero_y_zero_loop:
212 %if %1 == 16 215 %if %1 == 16
213 movu m0, [srcq] 216 movu m0, [srcq]
214 movu m2, [srcq + 16] 217 movu m2, [srcq + 16]
215 mova m1, [dstq] 218 mova m1, [dstq]
216 mova m3, [dstq + 16] 219 mova m3, [dstq + 16]
217 %if %2 == 1 ; avg 220 %if %2 == 1 ; avg
218 pavgw m0, [secq] 221 pavgw m0, [secq]
219 pavgw m2, [secq+16] 222 pavgw m2, [secq+16]
220 %endif 223 %endif
221 SUM_SSE m0, m1, m2, m3, m6, m7 224 SUM_SSE m0, m1, m2, m3, m6, m7
222 225
223 lea srcq, [srcq + src_strideq*2] 226 lea srcq, [srcq + src_strideq*2]
224 lea dstq, [dstq + dst_strideq*2] 227 lea dstq, [dstq + dst_strideq*2]
225 %if %2 == 1 ; avg 228 %if %2 == 1 ; avg
226 lea secq, [secq + sec_str*2] 229 add secq, sec_str
227 %endif 230 %endif
228 %else ; %1 < 16 231 %else ; %1 < 16
229 movu m0, [srcq] 232 movu m0, [srcq]
230 movu m2, [srcq + src_strideq*2] 233 movu m2, [srcq + src_strideq*2]
231 mova m1, [dstq] 234 mova m1, [dstq]
232 mova m3, [dstq + dst_strideq*2] 235 mova m3, [dstq + dst_strideq*2]
233 %if %2 == 1 ; avg 236 %if %2 == 1 ; avg
234 pavgw m0, [secq] 237 pavgw m0, [secq]
235 pavgw m2, [secq + sec_str*2] 238 add secq, sec_str
239 pavgw m2, [secq]
236 %endif 240 %endif
237 SUM_SSE m0, m1, m2, m3, m6, m7 241 SUM_SSE m0, m1, m2, m3, m6, m7
238 242
239 lea srcq, [srcq + src_strideq*4] 243 lea srcq, [srcq + src_strideq*4]
240 lea dstq, [dstq + dst_strideq*4] 244 lea dstq, [dstq + dst_strideq*4]
241 %if %2 == 1 ; avg 245 %if %2 == 1 ; avg
242 lea secq, [secq + sec_str*4] 246 add secq, sec_str
243 %endif 247 %endif
244 %endif 248 %endif
245 dec h 249 dec h
246 jg .x_zero_y_zero_loop 250 jg .x_zero_y_zero_loop
247 STORE_AND_RET 251 STORE_AND_RET
248 252
249 .x_zero_y_nonzero: 253 .x_zero_y_nonzero:
250 cmp y_offsetd, 8 254 cmp y_offsetd, 8
251 jne .x_zero_y_nonhalf 255 jne .x_zero_y_nonhalf
252 256
(...skipping 10 matching lines...) Expand all
263 pavgw m1, m5 267 pavgw m1, m5
264 %if %2 == 1 ; avg 268 %if %2 == 1 ; avg
265 pavgw m0, [secq] 269 pavgw m0, [secq]
266 pavgw m1, [secq+16] 270 pavgw m1, [secq+16]
267 %endif 271 %endif
268 SUM_SSE m0, m2, m1, m3, m6, m7 272 SUM_SSE m0, m2, m1, m3, m6, m7
269 273
270 lea srcq, [srcq + src_strideq*2] 274 lea srcq, [srcq + src_strideq*2]
271 lea dstq, [dstq + dst_strideq*2] 275 lea dstq, [dstq + dst_strideq*2]
272 %if %2 == 1 ; avg 276 %if %2 == 1 ; avg
273 lea secq, [secq + sec_str*2] 277 add secq, sec_str
274 %endif 278 %endif
275 %else ; %1 < 16 279 %else ; %1 < 16
276 movu m0, [srcq] 280 movu m0, [srcq]
277 movu m1, [srcq+src_strideq*2] 281 movu m1, [srcq+src_strideq*2]
278 movu m5, [srcq+src_strideq*4] 282 movu m5, [srcq+src_strideq*4]
279 mova m2, [dstq] 283 mova m2, [dstq]
280 mova m3, [dstq+dst_strideq*2] 284 mova m3, [dstq+dst_strideq*2]
281 pavgw m0, m1 285 pavgw m0, m1
282 pavgw m1, m5 286 pavgw m1, m5
283 %if %2 == 1 ; avg 287 %if %2 == 1 ; avg
284 pavgw m0, [secq] 288 pavgw m0, [secq]
285 pavgw m1, [secq+sec_str*2] 289 add secq, sec_str
290 pavgw m1, [secq]
286 %endif 291 %endif
287 SUM_SSE m0, m2, m1, m3, m6, m7 292 SUM_SSE m0, m2, m1, m3, m6, m7
288 293
289 lea srcq, [srcq + src_strideq*4] 294 lea srcq, [srcq + src_strideq*4]
290 lea dstq, [dstq + dst_strideq*4] 295 lea dstq, [dstq + dst_strideq*4]
291 %if %2 == 1 ; avg 296 %if %2 == 1 ; avg
292 lea secq, [secq + sec_str*4] 297 add secq, sec_str
293 %endif 298 %endif
294 %endif 299 %endif
295 dec h 300 dec h
296 jg .x_zero_y_half_loop 301 jg .x_zero_y_half_loop
297 STORE_AND_RET 302 STORE_AND_RET
298 303
299 .x_zero_y_nonhalf: 304 .x_zero_y_nonhalf:
300 ; x_offset == 0 && y_offset == bilin interpolation 305 ; x_offset == 0 && y_offset == bilin interpolation
301 %ifdef PIC 306 %ifdef PIC
302 lea bilin_filter, [bilin_filter_m] 307 lea bilin_filter, [bilin_filter_m]
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 psrlw m0, 4 356 psrlw m0, 4
352 %if %2 == 1 ; avg 357 %if %2 == 1 ; avg
353 pavgw m0, [secq] 358 pavgw m0, [secq]
354 pavgw m1, [secq+16] 359 pavgw m1, [secq+16]
355 %endif 360 %endif
356 SUM_SSE m0, m2, m1, m3, m6, m7 361 SUM_SSE m0, m2, m1, m3, m6, m7
357 362
358 lea srcq, [srcq + src_strideq*2] 363 lea srcq, [srcq + src_strideq*2]
359 lea dstq, [dstq + dst_strideq*2] 364 lea dstq, [dstq + dst_strideq*2]
360 %if %2 == 1 ; avg 365 %if %2 == 1 ; avg
361 lea secq, [secq + sec_str*2] 366 add secq, sec_str
362 %endif 367 %endif
363 %else ; %1 < 16 368 %else ; %1 < 16
364 movu m0, [srcq] 369 movu m0, [srcq]
365 movu m1, [srcq+src_strideq*2] 370 movu m1, [srcq+src_strideq*2]
366 movu m5, [srcq+src_strideq*4] 371 movu m5, [srcq+src_strideq*4]
367 mova m4, m1 372 mova m4, m1
368 mova m2, [dstq] 373 mova m2, [dstq]
369 mova m3, [dstq+dst_strideq*2] 374 mova m3, [dstq+dst_strideq*2]
370 pmullw m1, filter_y_a 375 pmullw m1, filter_y_a
371 pmullw m5, filter_y_b 376 pmullw m5, filter_y_b
372 paddw m1, filter_rnd 377 paddw m1, filter_rnd
373 pmullw m0, filter_y_a 378 pmullw m0, filter_y_a
374 pmullw m4, filter_y_b 379 pmullw m4, filter_y_b
375 paddw m0, filter_rnd 380 paddw m0, filter_rnd
376 paddw m1, m5 381 paddw m1, m5
377 paddw m0, m4 382 paddw m0, m4
378 psrlw m1, 4 383 psrlw m1, 4
379 psrlw m0, 4 384 psrlw m0, 4
380 %if %2 == 1 ; avg 385 %if %2 == 1 ; avg
381 pavgw m0, [secq] 386 pavgw m0, [secq]
382 pavgw m1, [secq+sec_str*2] 387 add secq, sec_str
388 pavgw m1, [secq]
383 %endif 389 %endif
384 SUM_SSE m0, m2, m1, m3, m6, m7 390 SUM_SSE m0, m2, m1, m3, m6, m7
385 391
386 lea srcq, [srcq + src_strideq*4] 392 lea srcq, [srcq + src_strideq*4]
387 lea dstq, [dstq + dst_strideq*4] 393 lea dstq, [dstq + dst_strideq*4]
388 %if %2 == 1 ; avg 394 %if %2 == 1 ; avg
389 lea secq, [secq + sec_str*4] 395 add secq, sec_str
390 %endif 396 %endif
391 %endif 397 %endif
392 dec h 398 dec h
393 jg .x_zero_y_other_loop 399 jg .x_zero_y_other_loop
394 %undef filter_y_a 400 %undef filter_y_a
395 %undef filter_y_b 401 %undef filter_y_b
396 %undef filter_rnd 402 %undef filter_rnd
397 STORE_AND_RET 403 STORE_AND_RET
398 404
399 .x_nonzero: 405 .x_nonzero:
(...skipping 16 matching lines...) Expand all
416 pavgw m1, m5 422 pavgw m1, m5
417 %if %2 == 1 ; avg 423 %if %2 == 1 ; avg
418 pavgw m0, [secq] 424 pavgw m0, [secq]
419 pavgw m1, [secq+16] 425 pavgw m1, [secq+16]
420 %endif 426 %endif
421 SUM_SSE m0, m2, m1, m3, m6, m7 427 SUM_SSE m0, m2, m1, m3, m6, m7
422 428
423 lea srcq, [srcq + src_strideq*2] 429 lea srcq, [srcq + src_strideq*2]
424 lea dstq, [dstq + dst_strideq*2] 430 lea dstq, [dstq + dst_strideq*2]
425 %if %2 == 1 ; avg 431 %if %2 == 1 ; avg
426 lea secq, [secq + sec_str*2] 432 add secq, sec_str
427 %endif 433 %endif
428 %else ; %1 < 16 434 %else ; %1 < 16
429 movu m0, [srcq] 435 movu m0, [srcq]
430 movu m1, [srcq + src_strideq*2] 436 movu m1, [srcq + src_strideq*2]
431 movu m4, [srcq + 2] 437 movu m4, [srcq + 2]
432 movu m5, [srcq + src_strideq*2 + 2] 438 movu m5, [srcq + src_strideq*2 + 2]
433 mova m2, [dstq] 439 mova m2, [dstq]
434 mova m3, [dstq + dst_strideq*2] 440 mova m3, [dstq + dst_strideq*2]
435 pavgw m0, m4 441 pavgw m0, m4
436 pavgw m1, m5 442 pavgw m1, m5
437 %if %2 == 1 ; avg 443 %if %2 == 1 ; avg
438 pavgw m0, [secq] 444 pavgw m0, [secq]
439 pavgw m1, [secq+sec_str*2] 445 add secq, sec_str
446 pavgw m1, [secq]
440 %endif 447 %endif
441 SUM_SSE m0, m2, m1, m3, m6, m7 448 SUM_SSE m0, m2, m1, m3, m6, m7
442 449
443 lea srcq, [srcq + src_strideq*4] 450 lea srcq, [srcq + src_strideq*4]
444 lea dstq, [dstq + dst_strideq*4] 451 lea dstq, [dstq + dst_strideq*4]
445 %if %2 == 1 ; avg 452 %if %2 == 1 ; avg
446 lea secq, [secq + sec_str*4] 453 add secq, sec_str
447 %endif 454 %endif
448 %endif 455 %endif
449 dec h 456 dec h
450 jg .x_half_y_zero_loop 457 jg .x_half_y_zero_loop
451 STORE_AND_RET 458 STORE_AND_RET
452 459
453 .x_half_y_nonzero: 460 .x_half_y_nonzero:
454 cmp y_offsetd, 8 461 cmp y_offsetd, 8
455 jne .x_half_y_nonhalf 462 jne .x_half_y_nonhalf
456 463
(...skipping 21 matching lines...) Expand all
478 pavgw m0, [secq] 485 pavgw m0, [secq]
479 pavgw m1, [secq+16] 486 pavgw m1, [secq+16]
480 %endif 487 %endif
481 SUM_SSE m0, m4, m1, m5, m6, m7 488 SUM_SSE m0, m4, m1, m5, m6, m7
482 mova m0, m2 489 mova m0, m2
483 mova m1, m3 490 mova m1, m3
484 491
485 lea srcq, [srcq + src_strideq*2] 492 lea srcq, [srcq + src_strideq*2]
486 lea dstq, [dstq + dst_strideq*2] 493 lea dstq, [dstq + dst_strideq*2]
487 %if %2 == 1 ; avg 494 %if %2 == 1 ; avg
488 lea secq, [secq + sec_str*2] 495 add secq, sec_str
489 %endif 496 %endif
490 %else ; %1 < 16 497 %else ; %1 < 16
491 movu m0, [srcq] 498 movu m0, [srcq]
492 movu m2, [srcq+2] 499 movu m2, [srcq+2]
493 lea srcq, [srcq + src_strideq*2] 500 lea srcq, [srcq + src_strideq*2]
494 pavgw m0, m2 501 pavgw m0, m2
495 .x_half_y_half_loop: 502 .x_half_y_half_loop:
496 movu m2, [srcq] 503 movu m2, [srcq]
497 movu m3, [srcq + src_strideq*2] 504 movu m3, [srcq + src_strideq*2]
498 movu m4, [srcq + 2] 505 movu m4, [srcq + 2]
499 movu m5, [srcq + src_strideq*2 + 2] 506 movu m5, [srcq + src_strideq*2 + 2]
500 pavgw m2, m4 507 pavgw m2, m4
501 pavgw m3, m5 508 pavgw m3, m5
502 pavgw m0, m2 509 pavgw m0, m2
503 pavgw m2, m3 510 pavgw m2, m3
504 mova m4, [dstq] 511 mova m4, [dstq]
505 mova m5, [dstq + dst_strideq*2] 512 mova m5, [dstq + dst_strideq*2]
506 %if %2 == 1 ; avg 513 %if %2 == 1 ; avg
507 pavgw m0, [secq] 514 pavgw m0, [secq]
508 pavgw m2, [secq+sec_str*2] 515 add secq, sec_str
516 pavgw m2, [secq]
509 %endif 517 %endif
510 SUM_SSE m0, m4, m2, m5, m6, m7 518 SUM_SSE m0, m4, m2, m5, m6, m7
511 mova m0, m3 519 mova m0, m3
512 520
513 lea srcq, [srcq + src_strideq*4] 521 lea srcq, [srcq + src_strideq*4]
514 lea dstq, [dstq + dst_strideq*4] 522 lea dstq, [dstq + dst_strideq*4]
515 %if %2 == 1 ; avg 523 %if %2 == 1 ; avg
516 lea secq, [secq + sec_str*4] 524 add secq, sec_str
517 %endif 525 %endif
518 %endif 526 %endif
519 dec h 527 dec h
520 jg .x_half_y_half_loop 528 jg .x_half_y_half_loop
521 STORE_AND_RET 529 STORE_AND_RET
522 530
523 .x_half_y_nonhalf: 531 .x_half_y_nonhalf:
524 ; x_offset == 0.5 && y_offset == bilin interpolation 532 ; x_offset == 0.5 && y_offset == bilin interpolation
525 %ifdef PIC 533 %ifdef PIC
526 lea bilin_filter, [bilin_filter_m] 534 lea bilin_filter, [bilin_filter_m]
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
583 pavgw m0, [secq] 591 pavgw m0, [secq]
584 pavgw m1, [secq+16] 592 pavgw m1, [secq+16]
585 %endif 593 %endif
586 SUM_SSE m0, m2, m1, m3, m6, m7 594 SUM_SSE m0, m2, m1, m3, m6, m7
587 mova m0, m4 595 mova m0, m4
588 mova m1, m5 596 mova m1, m5
589 597
590 lea srcq, [srcq + src_strideq*2] 598 lea srcq, [srcq + src_strideq*2]
591 lea dstq, [dstq + dst_strideq*2] 599 lea dstq, [dstq + dst_strideq*2]
592 %if %2 == 1 ; avg 600 %if %2 == 1 ; avg
593 lea secq, [secq + sec_str*2] 601 add secq, sec_str
594 %endif 602 %endif
595 %else ; %1 < 16 603 %else ; %1 < 16
596 movu m0, [srcq] 604 movu m0, [srcq]
597 movu m2, [srcq+2] 605 movu m2, [srcq+2]
598 lea srcq, [srcq + src_strideq*2] 606 lea srcq, [srcq + src_strideq*2]
599 pavgw m0, m2 607 pavgw m0, m2
600 .x_half_y_other_loop: 608 .x_half_y_other_loop:
601 movu m2, [srcq] 609 movu m2, [srcq]
602 movu m3, [srcq+src_strideq*2] 610 movu m3, [srcq+src_strideq*2]
603 movu m4, [srcq+2] 611 movu m4, [srcq+2]
604 movu m5, [srcq+src_strideq*2+2] 612 movu m5, [srcq+src_strideq*2+2]
605 pavgw m2, m4 613 pavgw m2, m4
606 pavgw m3, m5 614 pavgw m3, m5
607 mova m4, m2 615 mova m4, m2
608 mova m5, m3 616 mova m5, m3
609 pmullw m4, filter_y_a 617 pmullw m4, filter_y_a
610 pmullw m3, filter_y_b 618 pmullw m3, filter_y_b
611 paddw m4, filter_rnd 619 paddw m4, filter_rnd
612 paddw m4, m3 620 paddw m4, m3
613 pmullw m0, filter_y_a 621 pmullw m0, filter_y_a
614 pmullw m2, filter_y_b 622 pmullw m2, filter_y_b
615 paddw m0, filter_rnd 623 paddw m0, filter_rnd
616 psrlw m4, 4 624 psrlw m4, 4
617 paddw m0, m2 625 paddw m0, m2
618 mova m2, [dstq] 626 mova m2, [dstq]
619 psrlw m0, 4 627 psrlw m0, 4
620 mova m3, [dstq+dst_strideq*2] 628 mova m3, [dstq+dst_strideq*2]
621 %if %2 == 1 ; avg 629 %if %2 == 1 ; avg
622 pavgw m0, [secq] 630 pavgw m0, [secq]
623 pavgw m4, [secq+sec_str*2] 631 add secq, sec_str
632 pavgw m4, [secq]
624 %endif 633 %endif
625 SUM_SSE m0, m2, m4, m3, m6, m7 634 SUM_SSE m0, m2, m4, m3, m6, m7
626 mova m0, m5 635 mova m0, m5
627 636
628 lea srcq, [srcq + src_strideq*4] 637 lea srcq, [srcq + src_strideq*4]
629 lea dstq, [dstq + dst_strideq*4] 638 lea dstq, [dstq + dst_strideq*4]
630 %if %2 == 1 ; avg 639 %if %2 == 1 ; avg
631 lea secq, [secq + sec_str*4] 640 add secq, sec_str
632 %endif 641 %endif
633 %endif 642 %endif
634 dec h 643 dec h
635 jg .x_half_y_other_loop 644 jg .x_half_y_other_loop
636 %undef filter_y_a 645 %undef filter_y_a
637 %undef filter_y_b 646 %undef filter_y_b
638 %undef filter_rnd 647 %undef filter_rnd
639 STORE_AND_RET 648 STORE_AND_RET
640 649
641 .x_nonhalf: 650 .x_nonhalf:
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
691 psrlw m0, 4 700 psrlw m0, 4
692 %if %2 == 1 ; avg 701 %if %2 == 1 ; avg
693 pavgw m0, [secq] 702 pavgw m0, [secq]
694 pavgw m1, [secq+16] 703 pavgw m1, [secq+16]
695 %endif 704 %endif
696 SUM_SSE m0, m4, m1, m5, m6, m7 705 SUM_SSE m0, m4, m1, m5, m6, m7
697 706
698 lea srcq, [srcq+src_strideq*2] 707 lea srcq, [srcq+src_strideq*2]
699 lea dstq, [dstq+dst_strideq*2] 708 lea dstq, [dstq+dst_strideq*2]
700 %if %2 == 1 ; avg 709 %if %2 == 1 ; avg
701 lea secq, [secq + sec_str*2] 710 add secq, sec_str
702 %endif 711 %endif
703 %else ; %1 < 16 712 %else ; %1 < 16
704 movu m0, [srcq] 713 movu m0, [srcq]
705 movu m1, [srcq+src_strideq*2] 714 movu m1, [srcq+src_strideq*2]
706 movu m2, [srcq+2] 715 movu m2, [srcq+2]
707 movu m3, [srcq+src_strideq*2+2] 716 movu m3, [srcq+src_strideq*2+2]
708 mova m4, [dstq] 717 mova m4, [dstq]
709 mova m5, [dstq+dst_strideq*2] 718 mova m5, [dstq+dst_strideq*2]
710 pmullw m1, filter_x_a 719 pmullw m1, filter_x_a
711 pmullw m3, filter_x_b 720 pmullw m3, filter_x_b
712 paddw m1, filter_rnd 721 paddw m1, filter_rnd
713 pmullw m0, filter_x_a 722 pmullw m0, filter_x_a
714 pmullw m2, filter_x_b 723 pmullw m2, filter_x_b
715 paddw m0, filter_rnd 724 paddw m0, filter_rnd
716 paddw m1, m3 725 paddw m1, m3
717 paddw m0, m2 726 paddw m0, m2
718 psrlw m1, 4 727 psrlw m1, 4
719 psrlw m0, 4 728 psrlw m0, 4
720 %if %2 == 1 ; avg 729 %if %2 == 1 ; avg
721 pavgw m0, [secq] 730 pavgw m0, [secq]
722 pavgw m1, [secq+sec_str*2] 731 add secq, sec_str
732 pavgw m1, [secq]
723 %endif 733 %endif
724 SUM_SSE m0, m4, m1, m5, m6, m7 734 SUM_SSE m0, m4, m1, m5, m6, m7
725 735
726 lea srcq, [srcq+src_strideq*4] 736 lea srcq, [srcq+src_strideq*4]
727 lea dstq, [dstq+dst_strideq*4] 737 lea dstq, [dstq+dst_strideq*4]
728 %if %2 == 1 ; avg 738 %if %2 == 1 ; avg
729 lea secq, [secq + sec_str*4] 739 add secq, sec_str
730 %endif 740 %endif
731 %endif 741 %endif
732 dec h 742 dec h
733 jg .x_other_y_zero_loop 743 jg .x_other_y_zero_loop
734 %undef filter_x_a 744 %undef filter_x_a
735 %undef filter_x_b 745 %undef filter_x_b
736 %undef filter_rnd 746 %undef filter_rnd
737 STORE_AND_RET 747 STORE_AND_RET
738 748
739 .x_nonhalf_y_nonzero: 749 .x_nonhalf_y_nonzero:
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
808 pavgw m0, [secq] 818 pavgw m0, [secq]
809 pavgw m1, [secq+16] 819 pavgw m1, [secq+16]
810 %endif 820 %endif
811 SUM_SSE m0, m4, m1, m5, m6, m7 821 SUM_SSE m0, m4, m1, m5, m6, m7
812 mova m0, m2 822 mova m0, m2
813 mova m1, m3 823 mova m1, m3
814 824
815 lea srcq, [srcq+src_strideq*2] 825 lea srcq, [srcq+src_strideq*2]
816 lea dstq, [dstq+dst_strideq*2] 826 lea dstq, [dstq+dst_strideq*2]
817 %if %2 == 1 ; avg 827 %if %2 == 1 ; avg
818 lea secq, [secq + sec_str*2] 828 add secq, sec_str
819 %endif 829 %endif
820 %else ; %1 < 16 830 %else ; %1 < 16
821 movu m0, [srcq] 831 movu m0, [srcq]
822 movu m2, [srcq+2] 832 movu m2, [srcq+2]
823 pmullw m0, filter_x_a 833 pmullw m0, filter_x_a
824 pmullw m2, filter_x_b 834 pmullw m2, filter_x_b
825 paddw m0, filter_rnd 835 paddw m0, filter_rnd
826 paddw m0, m2 836 paddw m0, m2
827 psrlw m0, 4 837 psrlw m0, 4
828 lea srcq, [srcq+src_strideq*2] 838 lea srcq, [srcq+src_strideq*2]
(...skipping 11 matching lines...) Expand all
840 paddw m2, m4 850 paddw m2, m4
841 paddw m3, m5 851 paddw m3, m5
842 mova m4, [dstq] 852 mova m4, [dstq]
843 mova m5, [dstq+dst_strideq*2] 853 mova m5, [dstq+dst_strideq*2]
844 psrlw m2, 4 854 psrlw m2, 4
845 psrlw m3, 4 855 psrlw m3, 4
846 pavgw m0, m2 856 pavgw m0, m2
847 pavgw m2, m3 857 pavgw m2, m3
848 %if %2 == 1 ; avg 858 %if %2 == 1 ; avg
849 pavgw m0, [secq] 859 pavgw m0, [secq]
850 pavgw m2, [secq+sec_str*2] 860 add secq, sec_str
861 pavgw m2, [secq]
851 %endif 862 %endif
852 SUM_SSE m0, m4, m2, m5, m6, m7 863 SUM_SSE m0, m4, m2, m5, m6, m7
853 mova m0, m3 864 mova m0, m3
854 865
855 lea srcq, [srcq+src_strideq*4] 866 lea srcq, [srcq+src_strideq*4]
856 lea dstq, [dstq+dst_strideq*4] 867 lea dstq, [dstq+dst_strideq*4]
857 %if %2 == 1 ; avg 868 %if %2 == 1 ; avg
858 lea secq, [secq + sec_str*4] 869 add secq, sec_str
859 %endif 870 %endif
860 %endif 871 %endif
861 dec h 872 dec h
862 jg .x_other_y_half_loop 873 jg .x_other_y_half_loop
863 %undef filter_x_a 874 %undef filter_x_a
864 %undef filter_x_b 875 %undef filter_x_b
865 %undef filter_rnd 876 %undef filter_rnd
866 STORE_AND_RET 877 STORE_AND_RET
867 878
868 .x_nonhalf_y_nonhalf: 879 .x_nonhalf_y_nonhalf:
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
962 pavgw m0, [secq] 973 pavgw m0, [secq]
963 pavgw m1, [secq+16] 974 pavgw m1, [secq+16]
964 %endif 975 %endif
965 SUM_SSE m0, m2, m1, m3, m6, m7 976 SUM_SSE m0, m2, m1, m3, m6, m7
966 mova m0, m4 977 mova m0, m4
967 mova m1, m5 978 mova m1, m5
968 979
969 INC_SRC_BY_SRC_STRIDE 980 INC_SRC_BY_SRC_STRIDE
970 lea dstq, [dstq + dst_strideq * 2] 981 lea dstq, [dstq + dst_strideq * 2]
971 %if %2 == 1 ; avg 982 %if %2 == 1 ; avg
972 lea secq, [secq + sec_str*2] 983 add secq, sec_str
973 %endif 984 %endif
974 %else ; %1 < 16 985 %else ; %1 < 16
975 movu m0, [srcq] 986 movu m0, [srcq]
976 movu m2, [srcq+2] 987 movu m2, [srcq+2]
977 pmullw m0, filter_x_a 988 pmullw m0, filter_x_a
978 pmullw m2, filter_x_b 989 pmullw m2, filter_x_b
979 paddw m0, filter_rnd 990 paddw m0, filter_rnd
980 paddw m0, m2 991 paddw m0, m2
981 psrlw m0, 4 992 psrlw m0, 4
982 993
(...skipping 23 matching lines...) Expand all
1006 pmullw m3, filter_y_b 1017 pmullw m3, filter_y_b
1007 paddw m0, m2 1018 paddw m0, m2
1008 paddw m4, filter_rnd 1019 paddw m4, filter_rnd
1009 mova m2, [dstq] 1020 mova m2, [dstq]
1010 paddw m4, m3 1021 paddw m4, m3
1011 psrlw m0, 4 1022 psrlw m0, 4
1012 psrlw m4, 4 1023 psrlw m4, 4
1013 mova m3, [dstq+dst_strideq*2] 1024 mova m3, [dstq+dst_strideq*2]
1014 %if %2 == 1 ; avg 1025 %if %2 == 1 ; avg
1015 pavgw m0, [secq] 1026 pavgw m0, [secq]
1016 pavgw m4, [secq+sec_str*2] 1027 add secq, sec_str
1028 pavgw m4, [secq]
1017 %endif 1029 %endif
1018 SUM_SSE m0, m2, m4, m3, m6, m7 1030 SUM_SSE m0, m2, m4, m3, m6, m7
1019 mova m0, m5 1031 mova m0, m5
1020 1032
1021 INC_SRC_BY_SRC_2STRIDE 1033 INC_SRC_BY_SRC_2STRIDE
1022 lea dstq, [dstq + dst_strideq * 4] 1034 lea dstq, [dstq + dst_strideq * 4]
1023 %if %2 == 1 ; avg 1035 %if %2 == 1 ; avg
1024 lea secq, [secq + sec_str*4] 1036 add secq, sec_str
1025 %endif 1037 %endif
1026 %endif 1038 %endif
1027 dec h 1039 dec h
1028 jg .x_other_y_other_loop 1040 jg .x_other_y_other_loop
1029 %undef filter_x_a 1041 %undef filter_x_a
1030 %undef filter_x_b 1042 %undef filter_x_b
1031 %undef filter_y_a 1043 %undef filter_y_a
1032 %undef filter_y_b 1044 %undef filter_y_b
1033 %undef filter_rnd 1045 %undef filter_rnd
1034 STORE_AND_RET 1046 STORE_AND_RET
1035 %endmacro 1047 %endmacro
1036 1048
1037 INIT_XMM sse2 1049 INIT_XMM sse2
1038 SUBPEL_VARIANCE 8 1050 SUBPEL_VARIANCE 8
1039 SUBPEL_VARIANCE 16 1051 SUBPEL_VARIANCE 16
1040 1052
1041 INIT_XMM sse2 1053 INIT_XMM sse2
1042 SUBPEL_VARIANCE 8, 1 1054 SUBPEL_VARIANCE 8, 1
1043 SUBPEL_VARIANCE 16, 1 1055 SUBPEL_VARIANCE 16, 1
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698