Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(646)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_subtract_mmx.asm

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 ;void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
15 ; short *diff, unsigned char *Predictor,
16 ; int pitch);
17 global sym(vp9_subtract_b_mmx_impl)
18 sym(vp9_subtract_b_mmx_impl):
19 push rbp
20 mov rbp, rsp
21 SHADOW_ARGS_TO_STACK 5
22 push rsi
23 push rdi
24 ; end prolog
25
26
27 mov rdi, arg(2) ;diff
28 mov rax, arg(3) ;Predictor
29 mov rsi, arg(0) ;z
30 movsxd rdx, dword ptr arg(1);src_stride;
31 movsxd rcx, dword ptr arg(4);pitch
32 pxor mm7, mm7
33
34 movd mm0, [rsi]
35 movd mm1, [rax]
36 punpcklbw mm0, mm7
37 punpcklbw mm1, mm7
38 psubw mm0, mm1
39 movq [rdi], mm0
40
41
42 movd mm0, [rsi+rdx]
43 movd mm1, [rax+rcx]
44 punpcklbw mm0, mm7
45 punpcklbw mm1, mm7
46 psubw mm0, mm1
47 movq [rdi+rcx*2],mm0
48
49
50 movd mm0, [rsi+rdx*2]
51 movd mm1, [rax+rcx*2]
52 punpcklbw mm0, mm7
53 punpcklbw mm1, mm7
54 psubw mm0, mm1
55 movq [rdi+rcx*4], mm0
56
57 lea rsi, [rsi+rdx*2]
58 lea rcx, [rcx+rcx*2]
59
60
61
62 movd mm0, [rsi+rdx]
63 movd mm1, [rax+rcx]
64 punpcklbw mm0, mm7
65 punpcklbw mm1, mm7
66 psubw mm0, mm1
67 movq [rdi+rcx*2], mm0
68
69 ; begin epilog
70 pop rdi
71 pop rsi
72 UNSHADOW_ARGS
73 pop rbp
74 ret
75
76 ;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
77 global sym(vp9_subtract_mby_mmx)
78 sym(vp9_subtract_mby_mmx):
79 push rbp
80 mov rbp, rsp
81 SHADOW_ARGS_TO_STACK 4
82 push rsi
83 push rdi
84 ; end prolog
85
86
87 mov rsi, arg(1) ;src
88 mov rdi, arg(0) ;diff
89
90 mov rax, arg(2) ;pred
91 movsxd rdx, dword ptr arg(3) ;stride
92
93 mov rcx, 16
94 pxor mm0, mm0
95
96 .submby_loop:
97
98 movq mm1, [rsi]
99 movq mm3, [rax]
100
101 movq mm2, mm1
102 movq mm4, mm3
103
104 punpcklbw mm1, mm0
105 punpcklbw mm3, mm0
106
107 punpckhbw mm2, mm0
108 punpckhbw mm4, mm0
109
110 psubw mm1, mm3
111 psubw mm2, mm4
112
113 movq [rdi], mm1
114 movq [rdi+8], mm2
115
116
117 movq mm1, [rsi+8]
118 movq mm3, [rax+8]
119
120 movq mm2, mm1
121 movq mm4, mm3
122
123 punpcklbw mm1, mm0
124 punpcklbw mm3, mm0
125
126 punpckhbw mm2, mm0
127 punpckhbw mm4, mm0
128
129 psubw mm1, mm3
130 psubw mm2, mm4
131
132 movq [rdi+16], mm1
133 movq [rdi+24], mm2
134
135
136 add rdi, 32
137 add rax, 16
138
139 lea rsi, [rsi+rdx]
140
141 sub rcx, 1
142 jnz .submby_loop
143
144 pop rdi
145 pop rsi
146 ; begin epilog
147 UNSHADOW_ARGS
148 pop rbp
149 ret
150
151
152 ;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsr c, unsigned char *pred, int stride)
153 global sym(vp9_subtract_mbuv_mmx)
154 sym(vp9_subtract_mbuv_mmx):
155 push rbp
156 mov rbp, rsp
157 SHADOW_ARGS_TO_STACK 5
158 push rsi
159 push rdi
160 ; end prolog
161
162 ;short *udiff = diff + 256;
163 ;short *vdiff = diff + 320;
164 ;unsigned char *upred = pred + 256;
165 ;unsigned char *vpred = pred + 320;
166
167 ;unsigned char *z = usrc;
168 ;unsigned short *diff = udiff;
169 ;unsigned char *Predictor= upred;
170
171 mov rdi, arg(0) ;diff
172 mov rax, arg(3) ;pred
173 mov rsi, arg(1) ;z = usrc
174 add rdi, 256*2 ;diff = diff + 256 (shorts)
175 add rax, 256 ;Predictor = pred + 256
176 movsxd rdx, dword ptr arg(4) ;stride;
177 pxor mm7, mm7
178
179 movq mm0, [rsi]
180 movq mm1, [rax]
181 movq mm3, mm0
182 movq mm4, mm1
183 punpcklbw mm0, mm7
184 punpcklbw mm1, mm7
185 punpckhbw mm3, mm7
186 punpckhbw mm4, mm7
187 psubw mm0, mm1
188 psubw mm3, mm4
189 movq [rdi], mm0
190 movq [rdi+8], mm3
191
192
193 movq mm0, [rsi+rdx]
194 movq mm1, [rax+8]
195 movq mm3, mm0
196 movq mm4, mm1
197 punpcklbw mm0, mm7
198 punpcklbw mm1, mm7
199 punpckhbw mm3, mm7
200 punpckhbw mm4, mm7
201 psubw mm0, mm1
202 psubw mm3, mm4
203 movq [rdi+16], mm0
204 movq [rdi+24], mm3
205
206 movq mm0, [rsi+rdx*2]
207 movq mm1, [rax+16]
208 movq mm3, mm0
209 movq mm4, mm1
210 punpcklbw mm0, mm7
211 punpcklbw mm1, mm7
212 punpckhbw mm3, mm7
213 punpckhbw mm4, mm7
214 psubw mm0, mm1
215 psubw mm3, mm4
216 movq [rdi+32], mm0
217 movq [rdi+40], mm3
218 lea rsi, [rsi+rdx*2]
219
220
221 movq mm0, [rsi+rdx]
222 movq mm1, [rax+24]
223 movq mm3, mm0
224 movq mm4, mm1
225 punpcklbw mm0, mm7
226 punpcklbw mm1, mm7
227 punpckhbw mm3, mm7
228 punpckhbw mm4, mm7
229 psubw mm0, mm1
230 psubw mm3, mm4
231
232 movq [rdi+48], mm0
233 movq [rdi+56], mm3
234
235
236 add rdi, 64
237 add rax, 32
238 lea rsi, [rsi+rdx*2]
239
240
241 movq mm0, [rsi]
242 movq mm1, [rax]
243 movq mm3, mm0
244 movq mm4, mm1
245 punpcklbw mm0, mm7
246 punpcklbw mm1, mm7
247 punpckhbw mm3, mm7
248 punpckhbw mm4, mm7
249 psubw mm0, mm1
250 psubw mm3, mm4
251 movq [rdi], mm0
252 movq [rdi+8], mm3
253
254
255 movq mm0, [rsi+rdx]
256 movq mm1, [rax+8]
257 movq mm3, mm0
258 movq mm4, mm1
259 punpcklbw mm0, mm7
260 punpcklbw mm1, mm7
261 punpckhbw mm3, mm7
262 punpckhbw mm4, mm7
263 psubw mm0, mm1
264 psubw mm3, mm4
265 movq [rdi+16], mm0
266 movq [rdi+24], mm3
267
268 movq mm0, [rsi+rdx*2]
269 movq mm1, [rax+16]
270 movq mm3, mm0
271 movq mm4, mm1
272 punpcklbw mm0, mm7
273 punpcklbw mm1, mm7
274 punpckhbw mm3, mm7
275 punpckhbw mm4, mm7
276 psubw mm0, mm1
277 psubw mm3, mm4
278 movq [rdi+32], mm0
279 movq [rdi+40], mm3
280 lea rsi, [rsi+rdx*2]
281
282
283 movq mm0, [rsi+rdx]
284 movq mm1, [rax+24]
285 movq mm3, mm0
286 movq mm4, mm1
287 punpcklbw mm0, mm7
288 punpcklbw mm1, mm7
289 punpckhbw mm3, mm7
290 punpckhbw mm4, mm7
291 psubw mm0, mm1
292 psubw mm3, mm4
293
294 movq [rdi+48], mm0
295 movq [rdi+56], mm3
296
297 ;unsigned char *z = vsrc;
298 ;unsigned short *diff = vdiff;
299 ;unsigned char *Predictor= vpred;
300
301 mov rdi, arg(0) ;diff
302 mov rax, arg(3) ;pred
303 mov rsi, arg(2) ;z = usrc
304 add rdi, 320*2 ;diff = diff + 320 (shorts)
305 add rax, 320 ;Predictor = pred + 320
306 movsxd rdx, dword ptr arg(4) ;stride;
307 pxor mm7, mm7
308
309 movq mm0, [rsi]
310 movq mm1, [rax]
311 movq mm3, mm0
312 movq mm4, mm1
313 punpcklbw mm0, mm7
314 punpcklbw mm1, mm7
315 punpckhbw mm3, mm7
316 punpckhbw mm4, mm7
317 psubw mm0, mm1
318 psubw mm3, mm4
319 movq [rdi], mm0
320 movq [rdi+8], mm3
321
322
323 movq mm0, [rsi+rdx]
324 movq mm1, [rax+8]
325 movq mm3, mm0
326 movq mm4, mm1
327 punpcklbw mm0, mm7
328 punpcklbw mm1, mm7
329 punpckhbw mm3, mm7
330 punpckhbw mm4, mm7
331 psubw mm0, mm1
332 psubw mm3, mm4
333 movq [rdi+16], mm0
334 movq [rdi+24], mm3
335
336 movq mm0, [rsi+rdx*2]
337 movq mm1, [rax+16]
338 movq mm3, mm0
339 movq mm4, mm1
340 punpcklbw mm0, mm7
341 punpcklbw mm1, mm7
342 punpckhbw mm3, mm7
343 punpckhbw mm4, mm7
344 psubw mm0, mm1
345 psubw mm3, mm4
346 movq [rdi+32], mm0
347 movq [rdi+40], mm3
348 lea rsi, [rsi+rdx*2]
349
350
351 movq mm0, [rsi+rdx]
352 movq mm1, [rax+24]
353 movq mm3, mm0
354 movq mm4, mm1
355 punpcklbw mm0, mm7
356 punpcklbw mm1, mm7
357 punpckhbw mm3, mm7
358 punpckhbw mm4, mm7
359 psubw mm0, mm1
360 psubw mm3, mm4
361
362 movq [rdi+48], mm0
363 movq [rdi+56], mm3
364
365
366 add rdi, 64
367 add rax, 32
368 lea rsi, [rsi+rdx*2]
369
370
371 movq mm0, [rsi]
372 movq mm1, [rax]
373 movq mm3, mm0
374 movq mm4, mm1
375 punpcklbw mm0, mm7
376 punpcklbw mm1, mm7
377 punpckhbw mm3, mm7
378 punpckhbw mm4, mm7
379 psubw mm0, mm1
380 psubw mm3, mm4
381 movq [rdi], mm0
382 movq [rdi+8], mm3
383
384
385 movq mm0, [rsi+rdx]
386 movq mm1, [rax+8]
387 movq mm3, mm0
388 movq mm4, mm1
389 punpcklbw mm0, mm7
390 punpcklbw mm1, mm7
391 punpckhbw mm3, mm7
392 punpckhbw mm4, mm7
393 psubw mm0, mm1
394 psubw mm3, mm4
395 movq [rdi+16], mm0
396 movq [rdi+24], mm3
397
398 movq mm0, [rsi+rdx*2]
399 movq mm1, [rax+16]
400 movq mm3, mm0
401 movq mm4, mm1
402 punpcklbw mm0, mm7
403 punpcklbw mm1, mm7
404 punpckhbw mm3, mm7
405 punpckhbw mm4, mm7
406 psubw mm0, mm1
407 psubw mm3, mm4
408 movq [rdi+32], mm0
409 movq [rdi+40], mm3
410 lea rsi, [rsi+rdx*2]
411
412
413 movq mm0, [rsi+rdx]
414 movq mm1, [rax+24]
415 movq mm3, mm0
416 movq mm4, mm1
417 punpcklbw mm0, mm7
418 punpcklbw mm1, mm7
419 punpckhbw mm3, mm7
420 punpckhbw mm4, mm7
421 psubw mm0, mm1
422 psubw mm3, mm4
423
424 movq [rdi+48], mm0
425 movq [rdi+56], mm3
426
427 ; begin epilog
428 pop rdi
429 pop rsi
430 UNSHADOW_ARGS
431 pop rbp
432 ret
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698