OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
164 %endif | 164 %endif |
165 movdqu [rdi], xmm0 ;store the result | 165 movdqu [rdi], xmm0 ;store the result |
166 movdqu [rdi + 16], xmm2 ;store the result | 166 movdqu [rdi + 16], xmm2 ;store the result |
167 | 167 |
168 lea rsi, [rsi + 2*rax] | 168 lea rsi, [rsi + 2*rax] |
169 lea rdi, [rdi + 2*rdx] | 169 lea rdi, [rdi + 2*rdx] |
170 dec rcx | 170 dec rcx |
171 %endm | 171 %endm |
172 %endif | 172 %endif |
173 | 173 |
174 global sym(vp9_high_filter_block1d4_v2_sse2) PRIVATE | 174 global sym(vp9_highbd_filter_block1d4_v2_sse2) PRIVATE |
175 sym(vp9_high_filter_block1d4_v2_sse2): | 175 sym(vp9_highbd_filter_block1d4_v2_sse2): |
176 push rbp | 176 push rbp |
177 mov rbp, rsp | 177 mov rbp, rsp |
178 SHADOW_ARGS_TO_STACK 7 | 178 SHADOW_ARGS_TO_STACK 7 |
179 push rsi | 179 push rsi |
180 push rdi | 180 push rdi |
181 ; end prolog | 181 ; end prolog |
182 | 182 |
183 HIGH_GET_PARAM_4 | 183 HIGH_GET_PARAM_4 |
184 .loop: | 184 .loop: |
185 movq xmm0, [rsi] ;load src | 185 movq xmm0, [rsi] ;load src |
186 movq xmm1, [rsi + 2*rax] | 186 movq xmm1, [rsi + 2*rax] |
187 | 187 |
188 HIGH_APPLY_FILTER_4 0 | 188 HIGH_APPLY_FILTER_4 0 |
189 jnz .loop | 189 jnz .loop |
190 | 190 |
191 ; begin epilog | 191 ; begin epilog |
192 pop rdi | 192 pop rdi |
193 pop rsi | 193 pop rsi |
194 UNSHADOW_ARGS | 194 UNSHADOW_ARGS |
195 pop rbp | 195 pop rbp |
196 ret | 196 ret |
197 | 197 |
198 %if ARCH_X86_64 | 198 %if ARCH_X86_64 |
199 global sym(vp9_high_filter_block1d8_v2_sse2) PRIVATE | 199 global sym(vp9_highbd_filter_block1d8_v2_sse2) PRIVATE |
200 sym(vp9_high_filter_block1d8_v2_sse2): | 200 sym(vp9_highbd_filter_block1d8_v2_sse2): |
201 push rbp | 201 push rbp |
202 mov rbp, rsp | 202 mov rbp, rsp |
203 SHADOW_ARGS_TO_STACK 7 | 203 SHADOW_ARGS_TO_STACK 7 |
204 SAVE_XMM 8 | 204 SAVE_XMM 8 |
205 push rsi | 205 push rsi |
206 push rdi | 206 push rdi |
207 ; end prolog | 207 ; end prolog |
208 | 208 |
209 HIGH_GET_PARAM | 209 HIGH_GET_PARAM |
210 .loop: | 210 .loop: |
211 movdqu xmm0, [rsi] ;0 | 211 movdqu xmm0, [rsi] ;0 |
212 movdqu xmm1, [rsi + 2*rax] ;1 | 212 movdqu xmm1, [rsi + 2*rax] ;1 |
213 | 213 |
214 HIGH_APPLY_FILTER_8 0 | 214 HIGH_APPLY_FILTER_8 0 |
215 jnz .loop | 215 jnz .loop |
216 | 216 |
217 ; begin epilog | 217 ; begin epilog |
218 pop rdi | 218 pop rdi |
219 pop rsi | 219 pop rsi |
220 RESTORE_XMM | 220 RESTORE_XMM |
221 UNSHADOW_ARGS | 221 UNSHADOW_ARGS |
222 pop rbp | 222 pop rbp |
223 ret | 223 ret |
224 | 224 |
225 global sym(vp9_high_filter_block1d16_v2_sse2) PRIVATE | 225 global sym(vp9_highbd_filter_block1d16_v2_sse2) PRIVATE |
226 sym(vp9_high_filter_block1d16_v2_sse2): | 226 sym(vp9_highbd_filter_block1d16_v2_sse2): |
227 push rbp | 227 push rbp |
228 mov rbp, rsp | 228 mov rbp, rsp |
229 SHADOW_ARGS_TO_STACK 7 | 229 SHADOW_ARGS_TO_STACK 7 |
230 SAVE_XMM 9 | 230 SAVE_XMM 9 |
231 push rsi | 231 push rsi |
232 push rdi | 232 push rdi |
233 ; end prolog | 233 ; end prolog |
234 | 234 |
235 HIGH_GET_PARAM | 235 HIGH_GET_PARAM |
236 .loop: | 236 .loop: |
237 movdqu xmm0, [rsi] ;0 | 237 movdqu xmm0, [rsi] ;0 |
238 movdqu xmm2, [rsi + 16] | 238 movdqu xmm2, [rsi + 16] |
239 movdqu xmm1, [rsi + 2*rax] ;1 | 239 movdqu xmm1, [rsi + 2*rax] ;1 |
240 movdqu xmm3, [rsi + 2*rax + 16] | 240 movdqu xmm3, [rsi + 2*rax + 16] |
241 | 241 |
242 HIGH_APPLY_FILTER_16 0 | 242 HIGH_APPLY_FILTER_16 0 |
243 jnz .loop | 243 jnz .loop |
244 | 244 |
245 ; begin epilog | 245 ; begin epilog |
246 pop rdi | 246 pop rdi |
247 pop rsi | 247 pop rsi |
248 RESTORE_XMM | 248 RESTORE_XMM |
249 UNSHADOW_ARGS | 249 UNSHADOW_ARGS |
250 pop rbp | 250 pop rbp |
251 ret | 251 ret |
252 %endif | 252 %endif |
253 | 253 |
254 global sym(vp9_high_filter_block1d4_v2_avg_sse2) PRIVATE | 254 global sym(vp9_highbd_filter_block1d4_v2_avg_sse2) PRIVATE |
255 sym(vp9_high_filter_block1d4_v2_avg_sse2): | 255 sym(vp9_highbd_filter_block1d4_v2_avg_sse2): |
256 push rbp | 256 push rbp |
257 mov rbp, rsp | 257 mov rbp, rsp |
258 SHADOW_ARGS_TO_STACK 7 | 258 SHADOW_ARGS_TO_STACK 7 |
259 push rsi | 259 push rsi |
260 push rdi | 260 push rdi |
261 ; end prolog | 261 ; end prolog |
262 | 262 |
263 HIGH_GET_PARAM_4 | 263 HIGH_GET_PARAM_4 |
264 .loop: | 264 .loop: |
265 movq xmm0, [rsi] ;load src | 265 movq xmm0, [rsi] ;load src |
266 movq xmm1, [rsi + 2*rax] | 266 movq xmm1, [rsi + 2*rax] |
267 | 267 |
268 HIGH_APPLY_FILTER_4 1 | 268 HIGH_APPLY_FILTER_4 1 |
269 jnz .loop | 269 jnz .loop |
270 | 270 |
271 ; begin epilog | 271 ; begin epilog |
272 pop rdi | 272 pop rdi |
273 pop rsi | 273 pop rsi |
274 UNSHADOW_ARGS | 274 UNSHADOW_ARGS |
275 pop rbp | 275 pop rbp |
276 ret | 276 ret |
277 | 277 |
278 %if ARCH_X86_64 | 278 %if ARCH_X86_64 |
279 global sym(vp9_high_filter_block1d8_v2_avg_sse2) PRIVATE | 279 global sym(vp9_highbd_filter_block1d8_v2_avg_sse2) PRIVATE |
280 sym(vp9_high_filter_block1d8_v2_avg_sse2): | 280 sym(vp9_highbd_filter_block1d8_v2_avg_sse2): |
281 push rbp | 281 push rbp |
282 mov rbp, rsp | 282 mov rbp, rsp |
283 SHADOW_ARGS_TO_STACK 7 | 283 SHADOW_ARGS_TO_STACK 7 |
284 SAVE_XMM 8 | 284 SAVE_XMM 8 |
285 push rsi | 285 push rsi |
286 push rdi | 286 push rdi |
287 ; end prolog | 287 ; end prolog |
288 | 288 |
289 HIGH_GET_PARAM | 289 HIGH_GET_PARAM |
290 .loop: | 290 .loop: |
291 movdqu xmm0, [rsi] ;0 | 291 movdqu xmm0, [rsi] ;0 |
292 movdqu xmm1, [rsi + 2*rax] ;1 | 292 movdqu xmm1, [rsi + 2*rax] ;1 |
293 | 293 |
294 HIGH_APPLY_FILTER_8 1 | 294 HIGH_APPLY_FILTER_8 1 |
295 jnz .loop | 295 jnz .loop |
296 | 296 |
297 ; begin epilog | 297 ; begin epilog |
298 pop rdi | 298 pop rdi |
299 pop rsi | 299 pop rsi |
300 RESTORE_XMM | 300 RESTORE_XMM |
301 UNSHADOW_ARGS | 301 UNSHADOW_ARGS |
302 pop rbp | 302 pop rbp |
303 ret | 303 ret |
304 | 304 |
305 global sym(vp9_high_filter_block1d16_v2_avg_sse2) PRIVATE | 305 global sym(vp9_highbd_filter_block1d16_v2_avg_sse2) PRIVATE |
306 sym(vp9_high_filter_block1d16_v2_avg_sse2): | 306 sym(vp9_highbd_filter_block1d16_v2_avg_sse2): |
307 push rbp | 307 push rbp |
308 mov rbp, rsp | 308 mov rbp, rsp |
309 SHADOW_ARGS_TO_STACK 7 | 309 SHADOW_ARGS_TO_STACK 7 |
310 SAVE_XMM 9 | 310 SAVE_XMM 9 |
311 push rsi | 311 push rsi |
312 push rdi | 312 push rdi |
313 ; end prolog | 313 ; end prolog |
314 | 314 |
315 HIGH_GET_PARAM | 315 HIGH_GET_PARAM |
316 .loop: | 316 .loop: |
317 movdqu xmm0, [rsi] ;0 | 317 movdqu xmm0, [rsi] ;0 |
318 movdqu xmm1, [rsi + 2*rax] ;1 | 318 movdqu xmm1, [rsi + 2*rax] ;1 |
319 movdqu xmm2, [rsi + 16] | 319 movdqu xmm2, [rsi + 16] |
320 movdqu xmm3, [rsi + 2*rax + 16] | 320 movdqu xmm3, [rsi + 2*rax + 16] |
321 | 321 |
322 HIGH_APPLY_FILTER_16 1 | 322 HIGH_APPLY_FILTER_16 1 |
323 jnz .loop | 323 jnz .loop |
324 | 324 |
325 ; begin epilog | 325 ; begin epilog |
326 pop rdi | 326 pop rdi |
327 pop rsi | 327 pop rsi |
328 RESTORE_XMM | 328 RESTORE_XMM |
329 UNSHADOW_ARGS | 329 UNSHADOW_ARGS |
330 pop rbp | 330 pop rbp |
331 ret | 331 ret |
332 %endif | 332 %endif |
333 | 333 |
334 global sym(vp9_high_filter_block1d4_h2_sse2) PRIVATE | 334 global sym(vp9_highbd_filter_block1d4_h2_sse2) PRIVATE |
335 sym(vp9_high_filter_block1d4_h2_sse2): | 335 sym(vp9_highbd_filter_block1d4_h2_sse2): |
336 push rbp | 336 push rbp |
337 mov rbp, rsp | 337 mov rbp, rsp |
338 SHADOW_ARGS_TO_STACK 7 | 338 SHADOW_ARGS_TO_STACK 7 |
339 push rsi | 339 push rsi |
340 push rdi | 340 push rdi |
341 ; end prolog | 341 ; end prolog |
342 | 342 |
343 HIGH_GET_PARAM_4 | 343 HIGH_GET_PARAM_4 |
344 .loop: | 344 .loop: |
345 movdqu xmm0, [rsi] ;load src | 345 movdqu xmm0, [rsi] ;load src |
346 movdqa xmm1, xmm0 | 346 movdqa xmm1, xmm0 |
347 psrldq xmm1, 2 | 347 psrldq xmm1, 2 |
348 | 348 |
349 HIGH_APPLY_FILTER_4 0 | 349 HIGH_APPLY_FILTER_4 0 |
350 jnz .loop | 350 jnz .loop |
351 | 351 |
352 ; begin epilog | 352 ; begin epilog |
353 pop rdi | 353 pop rdi |
354 pop rsi | 354 pop rsi |
355 UNSHADOW_ARGS | 355 UNSHADOW_ARGS |
356 pop rbp | 356 pop rbp |
357 ret | 357 ret |
358 | 358 |
359 %if ARCH_X86_64 | 359 %if ARCH_X86_64 |
360 global sym(vp9_high_filter_block1d8_h2_sse2) PRIVATE | 360 global sym(vp9_highbd_filter_block1d8_h2_sse2) PRIVATE |
361 sym(vp9_high_filter_block1d8_h2_sse2): | 361 sym(vp9_highbd_filter_block1d8_h2_sse2): |
362 push rbp | 362 push rbp |
363 mov rbp, rsp | 363 mov rbp, rsp |
364 SHADOW_ARGS_TO_STACK 7 | 364 SHADOW_ARGS_TO_STACK 7 |
365 SAVE_XMM 8 | 365 SAVE_XMM 8 |
366 push rsi | 366 push rsi |
367 push rdi | 367 push rdi |
368 ; end prolog | 368 ; end prolog |
369 | 369 |
370 HIGH_GET_PARAM | 370 HIGH_GET_PARAM |
371 .loop: | 371 .loop: |
372 movdqu xmm0, [rsi] ;load src | 372 movdqu xmm0, [rsi] ;load src |
373 movdqu xmm1, [rsi + 2] | 373 movdqu xmm1, [rsi + 2] |
374 | 374 |
375 HIGH_APPLY_FILTER_8 0 | 375 HIGH_APPLY_FILTER_8 0 |
376 jnz .loop | 376 jnz .loop |
377 | 377 |
378 ; begin epilog | 378 ; begin epilog |
379 pop rdi | 379 pop rdi |
380 pop rsi | 380 pop rsi |
381 RESTORE_XMM | 381 RESTORE_XMM |
382 UNSHADOW_ARGS | 382 UNSHADOW_ARGS |
383 pop rbp | 383 pop rbp |
384 ret | 384 ret |
385 | 385 |
386 global sym(vp9_high_filter_block1d16_h2_sse2) PRIVATE | 386 global sym(vp9_highbd_filter_block1d16_h2_sse2) PRIVATE |
387 sym(vp9_high_filter_block1d16_h2_sse2): | 387 sym(vp9_highbd_filter_block1d16_h2_sse2): |
388 push rbp | 388 push rbp |
389 mov rbp, rsp | 389 mov rbp, rsp |
390 SHADOW_ARGS_TO_STACK 7 | 390 SHADOW_ARGS_TO_STACK 7 |
391 SAVE_XMM 9 | 391 SAVE_XMM 9 |
392 push rsi | 392 push rsi |
393 push rdi | 393 push rdi |
394 ; end prolog | 394 ; end prolog |
395 | 395 |
396 HIGH_GET_PARAM | 396 HIGH_GET_PARAM |
397 .loop: | 397 .loop: |
398 movdqu xmm0, [rsi] ;load src | 398 movdqu xmm0, [rsi] ;load src |
399 movdqu xmm1, [rsi + 2] | 399 movdqu xmm1, [rsi + 2] |
400 movdqu xmm2, [rsi + 16] | 400 movdqu xmm2, [rsi + 16] |
401 movdqu xmm3, [rsi + 18] | 401 movdqu xmm3, [rsi + 18] |
402 | 402 |
403 HIGH_APPLY_FILTER_16 0 | 403 HIGH_APPLY_FILTER_16 0 |
404 jnz .loop | 404 jnz .loop |
405 | 405 |
406 ; begin epilog | 406 ; begin epilog |
407 pop rdi | 407 pop rdi |
408 pop rsi | 408 pop rsi |
409 RESTORE_XMM | 409 RESTORE_XMM |
410 UNSHADOW_ARGS | 410 UNSHADOW_ARGS |
411 pop rbp | 411 pop rbp |
412 ret | 412 ret |
413 %endif | 413 %endif |
414 | 414 |
415 global sym(vp9_high_filter_block1d4_h2_avg_sse2) PRIVATE | 415 global sym(vp9_highbd_filter_block1d4_h2_avg_sse2) PRIVATE |
416 sym(vp9_high_filter_block1d4_h2_avg_sse2): | 416 sym(vp9_highbd_filter_block1d4_h2_avg_sse2): |
417 push rbp | 417 push rbp |
418 mov rbp, rsp | 418 mov rbp, rsp |
419 SHADOW_ARGS_TO_STACK 7 | 419 SHADOW_ARGS_TO_STACK 7 |
420 push rsi | 420 push rsi |
421 push rdi | 421 push rdi |
422 ; end prolog | 422 ; end prolog |
423 | 423 |
424 HIGH_GET_PARAM_4 | 424 HIGH_GET_PARAM_4 |
425 .loop: | 425 .loop: |
426 movdqu xmm0, [rsi] ;load src | 426 movdqu xmm0, [rsi] ;load src |
427 movdqa xmm1, xmm0 | 427 movdqa xmm1, xmm0 |
428 psrldq xmm1, 2 | 428 psrldq xmm1, 2 |
429 | 429 |
430 HIGH_APPLY_FILTER_4 1 | 430 HIGH_APPLY_FILTER_4 1 |
431 jnz .loop | 431 jnz .loop |
432 | 432 |
433 ; begin epilog | 433 ; begin epilog |
434 pop rdi | 434 pop rdi |
435 pop rsi | 435 pop rsi |
436 UNSHADOW_ARGS | 436 UNSHADOW_ARGS |
437 pop rbp | 437 pop rbp |
438 ret | 438 ret |
439 | 439 |
440 %if ARCH_X86_64 | 440 %if ARCH_X86_64 |
441 global sym(vp9_high_filter_block1d8_h2_avg_sse2) PRIVATE | 441 global sym(vp9_highbd_filter_block1d8_h2_avg_sse2) PRIVATE |
442 sym(vp9_high_filter_block1d8_h2_avg_sse2): | 442 sym(vp9_highbd_filter_block1d8_h2_avg_sse2): |
443 push rbp | 443 push rbp |
444 mov rbp, rsp | 444 mov rbp, rsp |
445 SHADOW_ARGS_TO_STACK 7 | 445 SHADOW_ARGS_TO_STACK 7 |
446 SAVE_XMM 8 | 446 SAVE_XMM 8 |
447 push rsi | 447 push rsi |
448 push rdi | 448 push rdi |
449 ; end prolog | 449 ; end prolog |
450 | 450 |
451 HIGH_GET_PARAM | 451 HIGH_GET_PARAM |
452 .loop: | 452 .loop: |
453 movdqu xmm0, [rsi] ;load src | 453 movdqu xmm0, [rsi] ;load src |
454 movdqu xmm1, [rsi + 2] | 454 movdqu xmm1, [rsi + 2] |
455 | 455 |
456 HIGH_APPLY_FILTER_8 1 | 456 HIGH_APPLY_FILTER_8 1 |
457 jnz .loop | 457 jnz .loop |
458 | 458 |
459 ; begin epilog | 459 ; begin epilog |
460 pop rdi | 460 pop rdi |
461 pop rsi | 461 pop rsi |
462 RESTORE_XMM | 462 RESTORE_XMM |
463 UNSHADOW_ARGS | 463 UNSHADOW_ARGS |
464 pop rbp | 464 pop rbp |
465 ret | 465 ret |
466 | 466 |
467 global sym(vp9_high_filter_block1d16_h2_avg_sse2) PRIVATE | 467 global sym(vp9_highbd_filter_block1d16_h2_avg_sse2) PRIVATE |
468 sym(vp9_high_filter_block1d16_h2_avg_sse2): | 468 sym(vp9_highbd_filter_block1d16_h2_avg_sse2): |
469 push rbp | 469 push rbp |
470 mov rbp, rsp | 470 mov rbp, rsp |
471 SHADOW_ARGS_TO_STACK 7 | 471 SHADOW_ARGS_TO_STACK 7 |
472 SAVE_XMM 9 | 472 SAVE_XMM 9 |
473 push rsi | 473 push rsi |
474 push rdi | 474 push rdi |
475 ; end prolog | 475 ; end prolog |
476 | 476 |
477 HIGH_GET_PARAM | 477 HIGH_GET_PARAM |
478 .loop: | 478 .loop: |
479 movdqu xmm0, [rsi] ;load src | 479 movdqu xmm0, [rsi] ;load src |
480 movdqu xmm1, [rsi + 2] | 480 movdqu xmm1, [rsi + 2] |
481 movdqu xmm2, [rsi + 16] | 481 movdqu xmm2, [rsi + 16] |
482 movdqu xmm3, [rsi + 18] | 482 movdqu xmm3, [rsi + 18] |
483 | 483 |
484 HIGH_APPLY_FILTER_16 1 | 484 HIGH_APPLY_FILTER_16 1 |
485 jnz .loop | 485 jnz .loop |
486 | 486 |
487 ; begin epilog | 487 ; begin epilog |
488 pop rdi | 488 pop rdi |
489 pop rsi | 489 pop rsi |
490 RESTORE_XMM | 490 RESTORE_XMM |
491 UNSHADOW_ARGS | 491 UNSHADOW_ARGS |
492 pop rbp | 492 pop rbp |
493 ret | 493 ret |
494 %endif | 494 %endif |
OLD | NEW |