Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm

Issue 11974002: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 pop rbx 195 pop rbx
196 196
197 ; begin epilog 197 ; begin epilog
198 pop rdi 198 pop rdi
199 pop rsi 199 pop rsi
200 RESTORE_GOT 200 RESTORE_GOT
201 UNSHADOW_ARGS 201 UNSHADOW_ARGS
202 pop rbp 202 pop rbp
203 ret 203 ret
204 204
205
206 ;void bilinear_predict8x8_mmx
207 ;(
208 ; unsigned char *src_ptr,
209 ; int src_pixels_per_line,
210 ; int xoffset,
211 ; int yoffset,
212 ; unsigned char *dst_ptr,
213 ; int dst_pitch
214 ;)
215 global sym(vp9_bilinear_predict8x8_mmx) PRIVATE
216 sym(vp9_bilinear_predict8x8_mmx):
217 push rbp
218 mov rbp, rsp
219 SHADOW_ARGS_TO_STACK 6
220 GET_GOT rbx
221 push rsi
222 push rdi
223 ; end prolog
224
225 ;const short *HFilter = bilinear_filters_mmx[xoffset];
226 ;const short *VFilter = bilinear_filters_mmx[yoffset];
227
228 movsxd rax, dword ptr arg(2) ;xoffset
229 mov rdi, arg(4) ;dst_ptr ;
230
231 shl rax, 5 ; offset * 32
232 lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
233
234 add rax, rcx ; HFilter
235 mov rsi, arg(0) ;src_ptr ;
236
237 movsxd rdx, dword ptr arg(5) ;dst_pitch
238 movq mm1, [rax] ;
239
240 movq mm2, [rax+16] ;
241 movsxd rax, dword ptr arg(3) ;yoffset
242
243 pxor mm0, mm0 ;
244
245 shl rax, 5 ; offset*32
246 add rax, rcx ; VFilter
247
248 lea rcx, [rdi+rdx*8] ;
249 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
250
251
252
253 ; get the first horizontal line done ;
254 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
255 movq mm4, mm3 ; make a copy of current lin e
256
257 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
258 punpckhbw mm4, mm0 ;
259
260 pmullw mm3, mm1 ;
261 pmullw mm4, mm1 ;
262
263 movq mm5, [rsi+1] ;
264 movq mm6, mm5 ;
265
266 punpcklbw mm5, mm0 ;
267 punpckhbw mm6, mm0 ;
268
269 pmullw mm5, mm2 ;
270 pmullw mm6, mm2 ;
271
272 paddw mm3, mm5 ;
273 paddw mm4, mm6 ;
274
275 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
276 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
277
278 paddw mm4, [GLOBAL(rd)] ;
279 psraw mm4, VP9_FILTER_SHIFT ;
280
281 movq mm7, mm3 ;
282 packuswb mm7, mm4 ;
283
284 add rsi, rdx ; next line
285 .next_row_8x8:
286 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
287 movq mm4, mm3 ; make a copy of current lin e
288
289 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
290 punpckhbw mm4, mm0 ;
291
292 pmullw mm3, mm1 ;
293 pmullw mm4, mm1 ;
294
295 movq mm5, [rsi+1] ;
296 movq mm6, mm5 ;
297
298 punpcklbw mm5, mm0 ;
299 punpckhbw mm6, mm0 ;
300
301 pmullw mm5, mm2 ;
302 pmullw mm6, mm2 ;
303
304 paddw mm3, mm5 ;
305 paddw mm4, mm6 ;
306
307 movq mm5, mm7 ;
308 movq mm6, mm7 ;
309
310 punpcklbw mm5, mm0 ;
311 punpckhbw mm6, mm0
312
313 pmullw mm5, [rax] ;
314 pmullw mm6, [rax] ;
315
316 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
317 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
318
319 paddw mm4, [GLOBAL(rd)] ;
320 psraw mm4, VP9_FILTER_SHIFT ;
321
322 movq mm7, mm3 ;
323 packuswb mm7, mm4 ;
324
325
326 pmullw mm3, [rax+16] ;
327 pmullw mm4, [rax+16] ;
328
329 paddw mm3, mm5 ;
330 paddw mm4, mm6 ;
331
332
333 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
334 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
335
336 paddw mm4, [GLOBAL(rd)] ;
337 psraw mm4, VP9_FILTER_SHIFT ;
338
339 packuswb mm3, mm4
340
341 movq [rdi], mm3 ; store the results in the d estination
342
343 %if ABI_IS_32BIT
344 add rsi, rdx ; next line
345 add rdi, dword ptr arg(5) ;dst_pitch ;
346 %else
347 movsxd r8, dword ptr arg(5) ;dst_pitch
348 add rsi, rdx ; next line
349 add rdi, r8 ;dst_pitch
350 %endif
351 cmp rdi, rcx ;
352 jne .next_row_8x8
353
354 ; begin epilog
355 pop rdi
356 pop rsi
357 RESTORE_GOT
358 UNSHADOW_ARGS
359 pop rbp
360 ret
361
362
363 ;void bilinear_predict8x4_mmx
364 ;(
365 ; unsigned char *src_ptr,
366 ; int src_pixels_per_line,
367 ; int xoffset,
368 ; int yoffset,
369 ; unsigned char *dst_ptr,
370 ; int dst_pitch
371 ;)
372 global sym(vp9_bilinear_predict8x4_mmx) PRIVATE
373 sym(vp9_bilinear_predict8x4_mmx):
374 push rbp
375 mov rbp, rsp
376 SHADOW_ARGS_TO_STACK 6
377 GET_GOT rbx
378 push rsi
379 push rdi
380 ; end prolog
381
382 ;const short *HFilter = bilinear_filters_mmx[xoffset];
383 ;const short *VFilter = bilinear_filters_mmx[yoffset];
384
385 movsxd rax, dword ptr arg(2) ;xoffset
386 mov rdi, arg(4) ;dst_ptr ;
387
388 lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
389 shl rax, 5
390
391 mov rsi, arg(0) ;src_ptr ;
392 add rax, rcx
393
394 movsxd rdx, dword ptr arg(5) ;dst_pitch
395 movq mm1, [rax] ;
396
397 movq mm2, [rax+16] ;
398 movsxd rax, dword ptr arg(3) ;yoffset
399
400 pxor mm0, mm0 ;
401 shl rax, 5
402
403 add rax, rcx
404 lea rcx, [rdi+rdx*4] ;
405
406 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
407
408 ; get the first horizontal line done ;
409 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
410 movq mm4, mm3 ; make a copy of current lin e
411
412 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
413 punpckhbw mm4, mm0 ;
414
415 pmullw mm3, mm1 ;
416 pmullw mm4, mm1 ;
417
418 movq mm5, [rsi+1] ;
419 movq mm6, mm5 ;
420
421 punpcklbw mm5, mm0 ;
422 punpckhbw mm6, mm0 ;
423
424 pmullw mm5, mm2 ;
425 pmullw mm6, mm2 ;
426
427 paddw mm3, mm5 ;
428 paddw mm4, mm6 ;
429
430 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
431 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
432
433 paddw mm4, [GLOBAL(rd)] ;
434 psraw mm4, VP9_FILTER_SHIFT ;
435
436 movq mm7, mm3 ;
437 packuswb mm7, mm4 ;
438
439 add rsi, rdx ; next line
440 .next_row_8x4:
441 movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
442 movq mm4, mm3 ; make a copy of current lin e
443
444 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
445 punpckhbw mm4, mm0 ;
446
447 pmullw mm3, mm1 ;
448 pmullw mm4, mm1 ;
449
450 movq mm5, [rsi+1] ;
451 movq mm6, mm5 ;
452
453 punpcklbw mm5, mm0 ;
454 punpckhbw mm6, mm0 ;
455
456 pmullw mm5, mm2 ;
457 pmullw mm6, mm2 ;
458
459 paddw mm3, mm5 ;
460 paddw mm4, mm6 ;
461
462 movq mm5, mm7 ;
463 movq mm6, mm7 ;
464
465 punpcklbw mm5, mm0 ;
466 punpckhbw mm6, mm0
467
468 pmullw mm5, [rax] ;
469 pmullw mm6, [rax] ;
470
471 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
472 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
473
474 paddw mm4, [GLOBAL(rd)] ;
475 psraw mm4, VP9_FILTER_SHIFT ;
476
477 movq mm7, mm3 ;
478 packuswb mm7, mm4 ;
479
480
481 pmullw mm3, [rax+16] ;
482 pmullw mm4, [rax+16] ;
483
484 paddw mm3, mm5 ;
485 paddw mm4, mm6 ;
486
487
488 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
489 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
490
491 paddw mm4, [GLOBAL(rd)] ;
492 psraw mm4, VP9_FILTER_SHIFT ;
493
494 packuswb mm3, mm4
495
496 movq [rdi], mm3 ; store the results in the d estination
497
498 %if ABI_IS_32BIT
499 add rsi, rdx ; next line
500 add rdi, dword ptr arg(5) ;dst_pitch ;
501 %else
502 movsxd r8, dword ptr arg(5) ;dst_pitch
503 add rsi, rdx ; next line
504 add rdi, r8
505 %endif
506 cmp rdi, rcx ;
507 jne .next_row_8x4
508
509 ; begin epilog
510 pop rdi
511 pop rsi
512 RESTORE_GOT
513 UNSHADOW_ARGS
514 pop rbp
515 ret
516
517
518 ;void bilinear_predict4x4_mmx
519 ;(
520 ; unsigned char *src_ptr,
521 ; int src_pixels_per_line,
522 ; int xoffset,
523 ; int yoffset,
524 ; unsigned char *dst_ptr,
525 ; int dst_pitch
526 ;)
527 global sym(vp9_bilinear_predict4x4_mmx) PRIVATE
528 sym(vp9_bilinear_predict4x4_mmx):
529 push rbp
530 mov rbp, rsp
531 SHADOW_ARGS_TO_STACK 6
532 GET_GOT rbx
533 push rsi
534 push rdi
535 ; end prolog
536
537 ;const short *HFilter = bilinear_filters_mmx[xoffset];
538 ;const short *VFilter = bilinear_filters_mmx[yoffset];
539
540 movsxd rax, dword ptr arg(2) ;xoffset
541 mov rdi, arg(4) ;dst_ptr ;
542
543 lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
544 shl rax, 5
545
546 add rax, rcx ; HFilter
547 mov rsi, arg(0) ;src_ptr ;
548
549 movsxd rdx, dword ptr arg(5) ;ldst_pitch
550 movq mm1, [rax] ;
551
552 movq mm2, [rax+16] ;
553 movsxd rax, dword ptr arg(3) ;yoffset
554
555 pxor mm0, mm0 ;
556 shl rax, 5
557
558 add rax, rcx
559 lea rcx, [rdi+rdx*4] ;
560
561 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
562
563 ; get the first horizontal line done ;
564 movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
565 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
566
567 pmullw mm3, mm1 ;
568 movd mm5, [rsi+1] ;
569
570 punpcklbw mm5, mm0 ;
571 pmullw mm5, mm2 ;
572
573 paddw mm3, mm5 ;
574 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
575
576 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
577
578 movq mm7, mm3 ;
579 packuswb mm7, mm0 ;
580
581 add rsi, rdx ; next line
582 .next_row_4x4:
583 movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
584 punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
585
586 pmullw mm3, mm1 ;
587 movd mm5, [rsi+1] ;
588
589 punpcklbw mm5, mm0 ;
590 pmullw mm5, mm2 ;
591
592 paddw mm3, mm5 ;
593
594 movq mm5, mm7 ;
595 punpcklbw mm5, mm0 ;
596
597 pmullw mm5, [rax] ;
598 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
599
600 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
601 movq mm7, mm3 ;
602
603 packuswb mm7, mm0 ;
604
605 pmullw mm3, [rax+16] ;
606 paddw mm3, mm5 ;
607
608
609 paddw mm3, [GLOBAL(rd)] ; xmm3 += round val ue
610 psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
611
612 packuswb mm3, mm0
613 movd [rdi], mm3 ; store the results in the d estination
614
615 %if ABI_IS_32BIT
616 add rsi, rdx ; next line
617 add rdi, dword ptr arg(5) ;dst_pitch ;
618 %else
619 movsxd r8, dword ptr arg(5) ;dst_pitch ;
620 add rsi, rdx ; next line
621 add rdi, r8
622 %endif
623
624 cmp rdi, rcx ;
625 jne .next_row_4x4
626
627 ; begin epilog
628 pop rdi
629 pop rsi
630 RESTORE_GOT
631 UNSHADOW_ARGS
632 pop rbp
633 ret
634
635
636
637 SECTION_RODATA 205 SECTION_RODATA
638 align 16 206 align 16
639 rd: 207 rd:
640 times 4 dw 0x40 208 times 4 dw 0x40
641 209
642 align 16 210 align 16
643 global HIDDEN_DATA(sym(vp9_six_tap_mmx)) 211 global HIDDEN_DATA(sym(vp9_six_tap_mmx))
644 sym(vp9_six_tap_mmx): 212 sym(vp9_six_tap_mmx):
645 times 8 dw 0 213 times 8 dw 0
646 times 8 dw 0 214 times 8 dw 0
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
691 times 8 dw -11 259 times 8 dw -11
692 times 8 dw 2 260 times 8 dw 2
693 261
694 times 8 dw 0 262 times 8 dw 0
695 times 8 dw -1 263 times 8 dw -1
696 times 8 dw 12 264 times 8 dw 12
697 times 8 dw 123 265 times 8 dw 123
698 times 8 dw -6 266 times 8 dw -6
699 times 8 dw 0 267 times 8 dw 0
700 268
701
702 align 16
703 global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx))
704 sym(vp9_bilinear_filters_8x_mmx):
705 times 8 dw 128
706 times 8 dw 0
707
708 times 8 dw 112
709 times 8 dw 16
710
711 times 8 dw 96
712 times 8 dw 32
713
714 times 8 dw 80
715 times 8 dw 48
716
717 times 8 dw 64
718 times 8 dw 64
719
720 times 8 dw 48
721 times 8 dw 80
722
723 times 8 dw 32
724 times 8 dw 96
725
726 times 8 dw 16
727 times 8 dw 112
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698