Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(270)

Side by Side Diff: source/libvpx/vp8/common/x86/iwalsh_sse2.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 %include "vpx_ports/x86_abi_support.asm" 12 %include "vpx_ports/x86_abi_support.asm"
13 13
14 ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) 14 ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
15 global sym(vp8_short_inv_walsh4x4_sse2) 15 global sym(vp8_short_inv_walsh4x4_sse2)
16 sym(vp8_short_inv_walsh4x4_sse2): 16 sym(vp8_short_inv_walsh4x4_sse2):
17 push rbp 17 push rbp
18 mov rbp, rsp 18 mov rbp, rsp
19 SHADOW_ARGS_TO_STACK 2 19 SHADOW_ARGS_TO_STACK 2
20 SAVE_XMM 20 SAVE_XMM 6
21 push rsi 21 push rsi
22 push rdi 22 push rdi
23 ; end prolog 23 ; end prolog
24 24
25 mov rsi, arg(0) 25 mov rsi, arg(0)
26 mov rdi, arg(1) 26 mov rdi, arg(1)
27 mov rax, 3 27 mov rax, 3
28 28
29 movdqa xmm0, [rsi + 0] ;ip[4] ip[0] 29 movdqa xmm0, [rsi + 0] ;ip[4] ip[0]
30 movdqa xmm1, [rsi + 16] ;ip[12] ip[8] 30 movdqa xmm1, [rsi + 16] ;ip[12] ip[8]
31 31
32 shl rax, 16 32 shl rax, 16
33 or rax, 3 ;00030003h 33 or rax, 3 ;00030003h
34 34
35 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] 35 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
36 movdqa xmm3, xmm0 ;ip[4] ip[0] 36 movdqa xmm3, xmm0 ;ip[4] ip[0]
37 37
38 paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 38 paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
39 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 39 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
40 40
41 movdqa xmm4, xmm0 41 movdqa xmm4, xmm0
42 punpcklqdq xmm0, xmm3 ;d1 a1 42 punpcklqdq xmm0, xmm3 ;d1 a1
43 punpckhqdq xmm4, xmm3 ;c1 b1 43 punpckhqdq xmm4, xmm3 ;c1 b1
44 movd xmm7, eax 44 movd xmm6, eax
45 45
46 movdqa xmm1, xmm4 ;c1 b1 46 movdqa xmm1, xmm4 ;c1 b1
47 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] 47 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
48 psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] 48 psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
49 49
50 ;;;temp output 50 ;;;temp output
51 ;; movdqu [rdi + 0], xmm4 51 ;; movdqu [rdi + 0], xmm4
52 ;; movdqu [rdi + 16], xmm3 52 ;; movdqu [rdi + 16], xmm3
53 53
54 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 ; 13 12 11 10 03 02 01 00 55 ; 13 12 11 10 03 02 01 00
56 ; 56 ;
57 ; 33 32 31 30 23 22 21 20 57 ; 33 32 31 30 23 22 21 20
58 ; 58 ;
59 movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 59 movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
60 punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 60 punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
61 punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 61 punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
62 movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 62 movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
63 punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 63 punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
64 punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 64 punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
65 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 65 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] 66 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
67 movdqa xmm3, xmm4 ;ip[4] ip[0] 67 movdqa xmm3, xmm4 ;ip[4] ip[0]
68 68
69 pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03 69 pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
70 70
71 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 71 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
72 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 72 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
73 73
74 movdqa xmm5, xmm4 74 movdqa xmm5, xmm4
75 punpcklqdq xmm4, xmm3 ;d1 a1 75 punpcklqdq xmm4, xmm3 ;d1 a1
76 punpckhqdq xmm5, xmm3 ;c1 b1 76 punpckhqdq xmm5, xmm3 ;c1 b1
77 77
78 movdqa xmm1, xmm5 ;c1 b1 78 movdqa xmm1, xmm5 ;c1 b1
79 paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] 79 paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
80 psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] 80 psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
81 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 81 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 ; 13 12 11 10 03 02 01 00 82 ; 13 12 11 10 03 02 01 00
83 ; 83 ;
84 ; 33 32 31 30 23 22 21 20 84 ; 33 32 31 30 23 22 21 20
85 ; 85 ;
86 movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00 86 movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00
87 punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00 87 punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00
88 punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10 88 punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10
89 movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00 89 movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00
90 punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 90 punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
91 punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 91 punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
92 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 92 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
93 paddw xmm5, xmm7 93 paddw xmm5, xmm6
94 paddw xmm1, xmm7 94 paddw xmm1, xmm6
95 95
96 psraw xmm5, 3 96 psraw xmm5, 3
97 psraw xmm1, 3 97 psraw xmm1, 3
98 98
99 movdqa [rdi + 0], xmm5 99 movdqa [rdi + 0], xmm5
100 movdqa [rdi + 16], xmm1 100 movdqa [rdi + 16], xmm1
101 101
102 ; begin epilog 102 ; begin epilog
103 pop rdi 103 pop rdi
104 pop rsi 104 pop rsi
105 RESTORE_XMM 105 RESTORE_XMM
106 UNSHADOW_ARGS 106 UNSHADOW_ARGS
107 pop rbp 107 pop rbp
108 ret 108 ret
109 109
110 SECTION_RODATA 110 SECTION_RODATA
111 align 16 111 align 16
112 x_s1sqr2: 112 x_s1sqr2:
113 times 4 dw 0x8A8C 113 times 4 dw 0x8A8C
114 align 16 114 align 16
115 x_c1sqr2less1: 115 x_c1sqr2less1:
116 times 4 dw 0x4E7B 116 times 4 dw 0x4E7B
117 align 16 117 align 16
118 fours: 118 fours:
119 times 4 dw 0x0004 119 times 4 dw 0x0004
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/x86/idctllm_sse2.asm ('k') | source/libvpx/vp8/common/x86/loopfilter_mmx.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698