Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(627)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_recon_mmx.asm

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13 ;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride)
14 global sym(vp9_recon_b_mmx)
15 sym(vp9_recon_b_mmx):
16 push rbp
17 mov rbp, rsp
18 SHADOW_ARGS_TO_STACK 4
19 push rsi
20 push rdi
21 ; end prolog
22
23 mov rsi, arg(0) ;s
24 mov rdi, arg(2) ;d
25 mov rdx, arg(1) ;q
26 movsxd rax, dword ptr arg(3) ;stride
27 pxor mm0, mm0
28
29 movd mm1, [rsi]
30 punpcklbw mm1, mm0
31 paddsw mm1, [rdx]
32 packuswb mm1, mm0 ; pack and unpack to saturate
33 movd [rdi], mm1
34
35 movd mm2, [rsi+16]
36 punpcklbw mm2, mm0
37 paddsw mm2, [rdx+32]
38 packuswb mm2, mm0 ; pack and unpack to saturate
39 movd [rdi+rax], mm2
40
41 movd mm3, [rsi+32]
42 punpcklbw mm3, mm0
43 paddsw mm3, [rdx+64]
44 packuswb mm3, mm0 ; pack and unpack to saturate
45 movd [rdi+2*rax], mm3
46
47 add rdi, rax
48 movd mm4, [rsi+48]
49 punpcklbw mm4, mm0
50 paddsw mm4, [rdx+96]
51 packuswb mm4, mm0 ; pack and unpack to saturate
52 movd [rdi+2*rax], mm4
53
54 ; begin epilog
55 pop rdi
56 pop rsi
57 UNSHADOW_ARGS
58 pop rbp
59 ret
60
61
62 ;void copy_mem8x8_mmx(
63 ; unsigned char *src,
64 ; int src_stride,
65 ; unsigned char *dst,
66 ; int dst_stride
67 ; )
68 global sym(vp9_copy_mem8x8_mmx)
69 sym(vp9_copy_mem8x8_mmx):
70 push rbp
71 mov rbp, rsp
72 SHADOW_ARGS_TO_STACK 4
73 push rsi
74 push rdi
75 ; end prolog
76
77 mov rsi, arg(0) ;src;
78 movq mm0, [rsi]
79
80 movsxd rax, dword ptr arg(1) ;src_stride;
81 mov rdi, arg(2) ;dst;
82
83 movq mm1, [rsi+rax]
84 movq mm2, [rsi+rax*2]
85
86 movsxd rcx, dword ptr arg(3) ;dst_stride
87 lea rsi, [rsi+rax*2]
88
89 movq [rdi], mm0
90 add rsi, rax
91
92 movq [rdi+rcx], mm1
93 movq [rdi+rcx*2], mm2
94
95
96 lea rdi, [rdi+rcx*2]
97 movq mm3, [rsi]
98
99 add rdi, rcx
100 movq mm4, [rsi+rax]
101
102 movq mm5, [rsi+rax*2]
103 movq [rdi], mm3
104
105 lea rsi, [rsi+rax*2]
106 movq [rdi+rcx], mm4
107
108 movq [rdi+rcx*2], mm5
109 lea rdi, [rdi+rcx*2]
110
111 movq mm0, [rsi+rax]
112 movq mm1, [rsi+rax*2]
113
114 movq [rdi+rcx], mm0
115 movq [rdi+rcx*2],mm1
116
117 ; begin epilog
118 pop rdi
119 pop rsi
120 UNSHADOW_ARGS
121 pop rbp
122 ret
123
124
125 ;void copy_mem8x4_mmx(
126 ; unsigned char *src,
127 ; int src_stride,
128 ; unsigned char *dst,
129 ; int dst_stride
130 ; )
131 global sym(vp9_copy_mem8x4_mmx)
132 sym(vp9_copy_mem8x4_mmx):
133 push rbp
134 mov rbp, rsp
135 SHADOW_ARGS_TO_STACK 4
136 push rsi
137 push rdi
138 ; end prolog
139
140 mov rsi, arg(0) ;src;
141 movq mm0, [rsi]
142
143 movsxd rax, dword ptr arg(1) ;src_stride;
144 mov rdi, arg(2) ;dst;
145
146 movq mm1, [rsi+rax]
147 movq mm2, [rsi+rax*2]
148
149 movsxd rcx, dword ptr arg(3) ;dst_stride
150 lea rsi, [rsi+rax*2]
151
152 movq [rdi], mm0
153 movq [rdi+rcx], mm1
154
155 movq [rdi+rcx*2], mm2
156 lea rdi, [rdi+rcx*2]
157
158 movq mm3, [rsi+rax]
159 movq [rdi+rcx], mm3
160
161 ; begin epilog
162 pop rdi
163 pop rsi
164 UNSHADOW_ARGS
165 pop rbp
166 ret
167
168
169 ;void copy_mem16x16_mmx(
170 ; unsigned char *src,
171 ; int src_stride,
172 ; unsigned char *dst,
173 ; int dst_stride
174 ; )
175 global sym(vp9_copy_mem16x16_mmx)
176 sym(vp9_copy_mem16x16_mmx):
177 push rbp
178 mov rbp, rsp
179 SHADOW_ARGS_TO_STACK 4
180 push rsi
181 push rdi
182 ; end prolog
183
184 mov rsi, arg(0) ;src;
185 movsxd rax, dword ptr arg(1) ;src_stride;
186
187 mov rdi, arg(2) ;dst;
188 movsxd rcx, dword ptr arg(3) ;dst_stride
189
190 movq mm0, [rsi]
191 movq mm3, [rsi+8];
192
193 movq mm1, [rsi+rax]
194 movq mm4, [rsi+rax+8]
195
196 movq mm2, [rsi+rax*2]
197 movq mm5, [rsi+rax*2+8]
198
199 lea rsi, [rsi+rax*2]
200 add rsi, rax
201
202 movq [rdi], mm0
203 movq [rdi+8], mm3
204
205 movq [rdi+rcx], mm1
206 movq [rdi+rcx+8], mm4
207
208 movq [rdi+rcx*2], mm2
209 movq [rdi+rcx*2+8], mm5
210
211 lea rdi, [rdi+rcx*2]
212 add rdi, rcx
213
214 movq mm0, [rsi]
215 movq mm3, [rsi+8];
216
217 movq mm1, [rsi+rax]
218 movq mm4, [rsi+rax+8]
219
220 movq mm2, [rsi+rax*2]
221 movq mm5, [rsi+rax*2+8]
222
223 lea rsi, [rsi+rax*2]
224 add rsi, rax
225
226 movq [rdi], mm0
227 movq [rdi+8], mm3
228
229 movq [rdi+rcx], mm1
230 movq [rdi+rcx+8], mm4
231
232 movq [rdi+rcx*2], mm2
233 movq [rdi+rcx*2+8], mm5
234
235 lea rdi, [rdi+rcx*2]
236 add rdi, rcx
237
238 movq mm0, [rsi]
239 movq mm3, [rsi+8];
240
241 movq mm1, [rsi+rax]
242 movq mm4, [rsi+rax+8]
243
244 movq mm2, [rsi+rax*2]
245 movq mm5, [rsi+rax*2+8]
246
247 lea rsi, [rsi+rax*2]
248 add rsi, rax
249
250 movq [rdi], mm0
251 movq [rdi+8], mm3
252
253 movq [rdi+rcx], mm1
254 movq [rdi+rcx+8], mm4
255
256 movq [rdi+rcx*2], mm2
257 movq [rdi+rcx*2+8], mm5
258
259 lea rdi, [rdi+rcx*2]
260 add rdi, rcx
261
262 movq mm0, [rsi]
263 movq mm3, [rsi+8];
264
265 movq mm1, [rsi+rax]
266 movq mm4, [rsi+rax+8]
267
268 movq mm2, [rsi+rax*2]
269 movq mm5, [rsi+rax*2+8]
270
271 lea rsi, [rsi+rax*2]
272 add rsi, rax
273
274 movq [rdi], mm0
275 movq [rdi+8], mm3
276
277 movq [rdi+rcx], mm1
278 movq [rdi+rcx+8], mm4
279
280 movq [rdi+rcx*2], mm2
281 movq [rdi+rcx*2+8], mm5
282
283 lea rdi, [rdi+rcx*2]
284 add rdi, rcx
285
286 movq mm0, [rsi]
287 movq mm3, [rsi+8];
288
289 movq mm1, [rsi+rax]
290 movq mm4, [rsi+rax+8]
291
292 movq mm2, [rsi+rax*2]
293 movq mm5, [rsi+rax*2+8]
294
295 lea rsi, [rsi+rax*2]
296 add rsi, rax
297
298 movq [rdi], mm0
299 movq [rdi+8], mm3
300
301 movq [rdi+rcx], mm1
302 movq [rdi+rcx+8], mm4
303
304 movq [rdi+rcx*2], mm2
305 movq [rdi+rcx*2+8], mm5
306
307 lea rdi, [rdi+rcx*2]
308 add rdi, rcx
309
310 movq mm0, [rsi]
311 movq mm3, [rsi+8];
312
313 movq [rdi], mm0
314 movq [rdi+8], mm3
315
316 ; begin epilog
317 pop rdi
318 pop rsi
319 UNSHADOW_ARGS
320 pop rbp
321 ret
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698