| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 %endif | 164 %endif |
| 165 movdqu [rdi], xmm0 ;store the result | 165 movdqu [rdi], xmm0 ;store the result |
| 166 movdqu [rdi + 16], xmm2 ;store the result | 166 movdqu [rdi + 16], xmm2 ;store the result |
| 167 | 167 |
| 168 lea rsi, [rsi + 2*rax] | 168 lea rsi, [rsi + 2*rax] |
| 169 lea rdi, [rdi + 2*rdx] | 169 lea rdi, [rdi + 2*rdx] |
| 170 dec rcx | 170 dec rcx |
| 171 %endm | 171 %endm |
| 172 %endif | 172 %endif |
| 173 | 173 |
| 174 global sym(vp9_high_filter_block1d4_v2_sse2) PRIVATE | 174 global sym(vp9_highbd_filter_block1d4_v2_sse2) PRIVATE |
| 175 sym(vp9_high_filter_block1d4_v2_sse2): | 175 sym(vp9_highbd_filter_block1d4_v2_sse2): |
| 176 push rbp | 176 push rbp |
| 177 mov rbp, rsp | 177 mov rbp, rsp |
| 178 SHADOW_ARGS_TO_STACK 7 | 178 SHADOW_ARGS_TO_STACK 7 |
| 179 push rsi | 179 push rsi |
| 180 push rdi | 180 push rdi |
| 181 ; end prolog | 181 ; end prolog |
| 182 | 182 |
| 183 HIGH_GET_PARAM_4 | 183 HIGH_GET_PARAM_4 |
| 184 .loop: | 184 .loop: |
| 185 movq xmm0, [rsi] ;load src | 185 movq xmm0, [rsi] ;load src |
| 186 movq xmm1, [rsi + 2*rax] | 186 movq xmm1, [rsi + 2*rax] |
| 187 | 187 |
| 188 HIGH_APPLY_FILTER_4 0 | 188 HIGH_APPLY_FILTER_4 0 |
| 189 jnz .loop | 189 jnz .loop |
| 190 | 190 |
| 191 ; begin epilog | 191 ; begin epilog |
| 192 pop rdi | 192 pop rdi |
| 193 pop rsi | 193 pop rsi |
| 194 UNSHADOW_ARGS | 194 UNSHADOW_ARGS |
| 195 pop rbp | 195 pop rbp |
| 196 ret | 196 ret |
| 197 | 197 |
| 198 %if ARCH_X86_64 | 198 %if ARCH_X86_64 |
| 199 global sym(vp9_high_filter_block1d8_v2_sse2) PRIVATE | 199 global sym(vp9_highbd_filter_block1d8_v2_sse2) PRIVATE |
| 200 sym(vp9_high_filter_block1d8_v2_sse2): | 200 sym(vp9_highbd_filter_block1d8_v2_sse2): |
| 201 push rbp | 201 push rbp |
| 202 mov rbp, rsp | 202 mov rbp, rsp |
| 203 SHADOW_ARGS_TO_STACK 7 | 203 SHADOW_ARGS_TO_STACK 7 |
| 204 SAVE_XMM 8 | 204 SAVE_XMM 8 |
| 205 push rsi | 205 push rsi |
| 206 push rdi | 206 push rdi |
| 207 ; end prolog | 207 ; end prolog |
| 208 | 208 |
| 209 HIGH_GET_PARAM | 209 HIGH_GET_PARAM |
| 210 .loop: | 210 .loop: |
| 211 movdqu xmm0, [rsi] ;0 | 211 movdqu xmm0, [rsi] ;0 |
| 212 movdqu xmm1, [rsi + 2*rax] ;1 | 212 movdqu xmm1, [rsi + 2*rax] ;1 |
| 213 | 213 |
| 214 HIGH_APPLY_FILTER_8 0 | 214 HIGH_APPLY_FILTER_8 0 |
| 215 jnz .loop | 215 jnz .loop |
| 216 | 216 |
| 217 ; begin epilog | 217 ; begin epilog |
| 218 pop rdi | 218 pop rdi |
| 219 pop rsi | 219 pop rsi |
| 220 RESTORE_XMM | 220 RESTORE_XMM |
| 221 UNSHADOW_ARGS | 221 UNSHADOW_ARGS |
| 222 pop rbp | 222 pop rbp |
| 223 ret | 223 ret |
| 224 | 224 |
| 225 global sym(vp9_high_filter_block1d16_v2_sse2) PRIVATE | 225 global sym(vp9_highbd_filter_block1d16_v2_sse2) PRIVATE |
| 226 sym(vp9_high_filter_block1d16_v2_sse2): | 226 sym(vp9_highbd_filter_block1d16_v2_sse2): |
| 227 push rbp | 227 push rbp |
| 228 mov rbp, rsp | 228 mov rbp, rsp |
| 229 SHADOW_ARGS_TO_STACK 7 | 229 SHADOW_ARGS_TO_STACK 7 |
| 230 SAVE_XMM 9 | 230 SAVE_XMM 9 |
| 231 push rsi | 231 push rsi |
| 232 push rdi | 232 push rdi |
| 233 ; end prolog | 233 ; end prolog |
| 234 | 234 |
| 235 HIGH_GET_PARAM | 235 HIGH_GET_PARAM |
| 236 .loop: | 236 .loop: |
| 237 movdqu xmm0, [rsi] ;0 | 237 movdqu xmm0, [rsi] ;0 |
| 238 movdqu xmm2, [rsi + 16] | 238 movdqu xmm2, [rsi + 16] |
| 239 movdqu xmm1, [rsi + 2*rax] ;1 | 239 movdqu xmm1, [rsi + 2*rax] ;1 |
| 240 movdqu xmm3, [rsi + 2*rax + 16] | 240 movdqu xmm3, [rsi + 2*rax + 16] |
| 241 | 241 |
| 242 HIGH_APPLY_FILTER_16 0 | 242 HIGH_APPLY_FILTER_16 0 |
| 243 jnz .loop | 243 jnz .loop |
| 244 | 244 |
| 245 ; begin epilog | 245 ; begin epilog |
| 246 pop rdi | 246 pop rdi |
| 247 pop rsi | 247 pop rsi |
| 248 RESTORE_XMM | 248 RESTORE_XMM |
| 249 UNSHADOW_ARGS | 249 UNSHADOW_ARGS |
| 250 pop rbp | 250 pop rbp |
| 251 ret | 251 ret |
| 252 %endif | 252 %endif |
| 253 | 253 |
| 254 global sym(vp9_high_filter_block1d4_v2_avg_sse2) PRIVATE | 254 global sym(vp9_highbd_filter_block1d4_v2_avg_sse2) PRIVATE |
| 255 sym(vp9_high_filter_block1d4_v2_avg_sse2): | 255 sym(vp9_highbd_filter_block1d4_v2_avg_sse2): |
| 256 push rbp | 256 push rbp |
| 257 mov rbp, rsp | 257 mov rbp, rsp |
| 258 SHADOW_ARGS_TO_STACK 7 | 258 SHADOW_ARGS_TO_STACK 7 |
| 259 push rsi | 259 push rsi |
| 260 push rdi | 260 push rdi |
| 261 ; end prolog | 261 ; end prolog |
| 262 | 262 |
| 263 HIGH_GET_PARAM_4 | 263 HIGH_GET_PARAM_4 |
| 264 .loop: | 264 .loop: |
| 265 movq xmm0, [rsi] ;load src | 265 movq xmm0, [rsi] ;load src |
| 266 movq xmm1, [rsi + 2*rax] | 266 movq xmm1, [rsi + 2*rax] |
| 267 | 267 |
| 268 HIGH_APPLY_FILTER_4 1 | 268 HIGH_APPLY_FILTER_4 1 |
| 269 jnz .loop | 269 jnz .loop |
| 270 | 270 |
| 271 ; begin epilog | 271 ; begin epilog |
| 272 pop rdi | 272 pop rdi |
| 273 pop rsi | 273 pop rsi |
| 274 UNSHADOW_ARGS | 274 UNSHADOW_ARGS |
| 275 pop rbp | 275 pop rbp |
| 276 ret | 276 ret |
| 277 | 277 |
| 278 %if ARCH_X86_64 | 278 %if ARCH_X86_64 |
| 279 global sym(vp9_high_filter_block1d8_v2_avg_sse2) PRIVATE | 279 global sym(vp9_highbd_filter_block1d8_v2_avg_sse2) PRIVATE |
| 280 sym(vp9_high_filter_block1d8_v2_avg_sse2): | 280 sym(vp9_highbd_filter_block1d8_v2_avg_sse2): |
| 281 push rbp | 281 push rbp |
| 282 mov rbp, rsp | 282 mov rbp, rsp |
| 283 SHADOW_ARGS_TO_STACK 7 | 283 SHADOW_ARGS_TO_STACK 7 |
| 284 SAVE_XMM 8 | 284 SAVE_XMM 8 |
| 285 push rsi | 285 push rsi |
| 286 push rdi | 286 push rdi |
| 287 ; end prolog | 287 ; end prolog |
| 288 | 288 |
| 289 HIGH_GET_PARAM | 289 HIGH_GET_PARAM |
| 290 .loop: | 290 .loop: |
| 291 movdqu xmm0, [rsi] ;0 | 291 movdqu xmm0, [rsi] ;0 |
| 292 movdqu xmm1, [rsi + 2*rax] ;1 | 292 movdqu xmm1, [rsi + 2*rax] ;1 |
| 293 | 293 |
| 294 HIGH_APPLY_FILTER_8 1 | 294 HIGH_APPLY_FILTER_8 1 |
| 295 jnz .loop | 295 jnz .loop |
| 296 | 296 |
| 297 ; begin epilog | 297 ; begin epilog |
| 298 pop rdi | 298 pop rdi |
| 299 pop rsi | 299 pop rsi |
| 300 RESTORE_XMM | 300 RESTORE_XMM |
| 301 UNSHADOW_ARGS | 301 UNSHADOW_ARGS |
| 302 pop rbp | 302 pop rbp |
| 303 ret | 303 ret |
| 304 | 304 |
| 305 global sym(vp9_high_filter_block1d16_v2_avg_sse2) PRIVATE | 305 global sym(vp9_highbd_filter_block1d16_v2_avg_sse2) PRIVATE |
| 306 sym(vp9_high_filter_block1d16_v2_avg_sse2): | 306 sym(vp9_highbd_filter_block1d16_v2_avg_sse2): |
| 307 push rbp | 307 push rbp |
| 308 mov rbp, rsp | 308 mov rbp, rsp |
| 309 SHADOW_ARGS_TO_STACK 7 | 309 SHADOW_ARGS_TO_STACK 7 |
| 310 SAVE_XMM 9 | 310 SAVE_XMM 9 |
| 311 push rsi | 311 push rsi |
| 312 push rdi | 312 push rdi |
| 313 ; end prolog | 313 ; end prolog |
| 314 | 314 |
| 315 HIGH_GET_PARAM | 315 HIGH_GET_PARAM |
| 316 .loop: | 316 .loop: |
| 317 movdqu xmm0, [rsi] ;0 | 317 movdqu xmm0, [rsi] ;0 |
| 318 movdqu xmm1, [rsi + 2*rax] ;1 | 318 movdqu xmm1, [rsi + 2*rax] ;1 |
| 319 movdqu xmm2, [rsi + 16] | 319 movdqu xmm2, [rsi + 16] |
| 320 movdqu xmm3, [rsi + 2*rax + 16] | 320 movdqu xmm3, [rsi + 2*rax + 16] |
| 321 | 321 |
| 322 HIGH_APPLY_FILTER_16 1 | 322 HIGH_APPLY_FILTER_16 1 |
| 323 jnz .loop | 323 jnz .loop |
| 324 | 324 |
| 325 ; begin epilog | 325 ; begin epilog |
| 326 pop rdi | 326 pop rdi |
| 327 pop rsi | 327 pop rsi |
| 328 RESTORE_XMM | 328 RESTORE_XMM |
| 329 UNSHADOW_ARGS | 329 UNSHADOW_ARGS |
| 330 pop rbp | 330 pop rbp |
| 331 ret | 331 ret |
| 332 %endif | 332 %endif |
| 333 | 333 |
| 334 global sym(vp9_high_filter_block1d4_h2_sse2) PRIVATE | 334 global sym(vp9_highbd_filter_block1d4_h2_sse2) PRIVATE |
| 335 sym(vp9_high_filter_block1d4_h2_sse2): | 335 sym(vp9_highbd_filter_block1d4_h2_sse2): |
| 336 push rbp | 336 push rbp |
| 337 mov rbp, rsp | 337 mov rbp, rsp |
| 338 SHADOW_ARGS_TO_STACK 7 | 338 SHADOW_ARGS_TO_STACK 7 |
| 339 push rsi | 339 push rsi |
| 340 push rdi | 340 push rdi |
| 341 ; end prolog | 341 ; end prolog |
| 342 | 342 |
| 343 HIGH_GET_PARAM_4 | 343 HIGH_GET_PARAM_4 |
| 344 .loop: | 344 .loop: |
| 345 movdqu xmm0, [rsi] ;load src | 345 movdqu xmm0, [rsi] ;load src |
| 346 movdqa xmm1, xmm0 | 346 movdqa xmm1, xmm0 |
| 347 psrldq xmm1, 2 | 347 psrldq xmm1, 2 |
| 348 | 348 |
| 349 HIGH_APPLY_FILTER_4 0 | 349 HIGH_APPLY_FILTER_4 0 |
| 350 jnz .loop | 350 jnz .loop |
| 351 | 351 |
| 352 ; begin epilog | 352 ; begin epilog |
| 353 pop rdi | 353 pop rdi |
| 354 pop rsi | 354 pop rsi |
| 355 UNSHADOW_ARGS | 355 UNSHADOW_ARGS |
| 356 pop rbp | 356 pop rbp |
| 357 ret | 357 ret |
| 358 | 358 |
| 359 %if ARCH_X86_64 | 359 %if ARCH_X86_64 |
| 360 global sym(vp9_high_filter_block1d8_h2_sse2) PRIVATE | 360 global sym(vp9_highbd_filter_block1d8_h2_sse2) PRIVATE |
| 361 sym(vp9_high_filter_block1d8_h2_sse2): | 361 sym(vp9_highbd_filter_block1d8_h2_sse2): |
| 362 push rbp | 362 push rbp |
| 363 mov rbp, rsp | 363 mov rbp, rsp |
| 364 SHADOW_ARGS_TO_STACK 7 | 364 SHADOW_ARGS_TO_STACK 7 |
| 365 SAVE_XMM 8 | 365 SAVE_XMM 8 |
| 366 push rsi | 366 push rsi |
| 367 push rdi | 367 push rdi |
| 368 ; end prolog | 368 ; end prolog |
| 369 | 369 |
| 370 HIGH_GET_PARAM | 370 HIGH_GET_PARAM |
| 371 .loop: | 371 .loop: |
| 372 movdqu xmm0, [rsi] ;load src | 372 movdqu xmm0, [rsi] ;load src |
| 373 movdqu xmm1, [rsi + 2] | 373 movdqu xmm1, [rsi + 2] |
| 374 | 374 |
| 375 HIGH_APPLY_FILTER_8 0 | 375 HIGH_APPLY_FILTER_8 0 |
| 376 jnz .loop | 376 jnz .loop |
| 377 | 377 |
| 378 ; begin epilog | 378 ; begin epilog |
| 379 pop rdi | 379 pop rdi |
| 380 pop rsi | 380 pop rsi |
| 381 RESTORE_XMM | 381 RESTORE_XMM |
| 382 UNSHADOW_ARGS | 382 UNSHADOW_ARGS |
| 383 pop rbp | 383 pop rbp |
| 384 ret | 384 ret |
| 385 | 385 |
| 386 global sym(vp9_high_filter_block1d16_h2_sse2) PRIVATE | 386 global sym(vp9_highbd_filter_block1d16_h2_sse2) PRIVATE |
| 387 sym(vp9_high_filter_block1d16_h2_sse2): | 387 sym(vp9_highbd_filter_block1d16_h2_sse2): |
| 388 push rbp | 388 push rbp |
| 389 mov rbp, rsp | 389 mov rbp, rsp |
| 390 SHADOW_ARGS_TO_STACK 7 | 390 SHADOW_ARGS_TO_STACK 7 |
| 391 SAVE_XMM 9 | 391 SAVE_XMM 9 |
| 392 push rsi | 392 push rsi |
| 393 push rdi | 393 push rdi |
| 394 ; end prolog | 394 ; end prolog |
| 395 | 395 |
| 396 HIGH_GET_PARAM | 396 HIGH_GET_PARAM |
| 397 .loop: | 397 .loop: |
| 398 movdqu xmm0, [rsi] ;load src | 398 movdqu xmm0, [rsi] ;load src |
| 399 movdqu xmm1, [rsi + 2] | 399 movdqu xmm1, [rsi + 2] |
| 400 movdqu xmm2, [rsi + 16] | 400 movdqu xmm2, [rsi + 16] |
| 401 movdqu xmm3, [rsi + 18] | 401 movdqu xmm3, [rsi + 18] |
| 402 | 402 |
| 403 HIGH_APPLY_FILTER_16 0 | 403 HIGH_APPLY_FILTER_16 0 |
| 404 jnz .loop | 404 jnz .loop |
| 405 | 405 |
| 406 ; begin epilog | 406 ; begin epilog |
| 407 pop rdi | 407 pop rdi |
| 408 pop rsi | 408 pop rsi |
| 409 RESTORE_XMM | 409 RESTORE_XMM |
| 410 UNSHADOW_ARGS | 410 UNSHADOW_ARGS |
| 411 pop rbp | 411 pop rbp |
| 412 ret | 412 ret |
| 413 %endif | 413 %endif |
| 414 | 414 |
| 415 global sym(vp9_high_filter_block1d4_h2_avg_sse2) PRIVATE | 415 global sym(vp9_highbd_filter_block1d4_h2_avg_sse2) PRIVATE |
| 416 sym(vp9_high_filter_block1d4_h2_avg_sse2): | 416 sym(vp9_highbd_filter_block1d4_h2_avg_sse2): |
| 417 push rbp | 417 push rbp |
| 418 mov rbp, rsp | 418 mov rbp, rsp |
| 419 SHADOW_ARGS_TO_STACK 7 | 419 SHADOW_ARGS_TO_STACK 7 |
| 420 push rsi | 420 push rsi |
| 421 push rdi | 421 push rdi |
| 422 ; end prolog | 422 ; end prolog |
| 423 | 423 |
| 424 HIGH_GET_PARAM_4 | 424 HIGH_GET_PARAM_4 |
| 425 .loop: | 425 .loop: |
| 426 movdqu xmm0, [rsi] ;load src | 426 movdqu xmm0, [rsi] ;load src |
| 427 movdqa xmm1, xmm0 | 427 movdqa xmm1, xmm0 |
| 428 psrldq xmm1, 2 | 428 psrldq xmm1, 2 |
| 429 | 429 |
| 430 HIGH_APPLY_FILTER_4 1 | 430 HIGH_APPLY_FILTER_4 1 |
| 431 jnz .loop | 431 jnz .loop |
| 432 | 432 |
| 433 ; begin epilog | 433 ; begin epilog |
| 434 pop rdi | 434 pop rdi |
| 435 pop rsi | 435 pop rsi |
| 436 UNSHADOW_ARGS | 436 UNSHADOW_ARGS |
| 437 pop rbp | 437 pop rbp |
| 438 ret | 438 ret |
| 439 | 439 |
| 440 %if ARCH_X86_64 | 440 %if ARCH_X86_64 |
| 441 global sym(vp9_high_filter_block1d8_h2_avg_sse2) PRIVATE | 441 global sym(vp9_highbd_filter_block1d8_h2_avg_sse2) PRIVATE |
| 442 sym(vp9_high_filter_block1d8_h2_avg_sse2): | 442 sym(vp9_highbd_filter_block1d8_h2_avg_sse2): |
| 443 push rbp | 443 push rbp |
| 444 mov rbp, rsp | 444 mov rbp, rsp |
| 445 SHADOW_ARGS_TO_STACK 7 | 445 SHADOW_ARGS_TO_STACK 7 |
| 446 SAVE_XMM 8 | 446 SAVE_XMM 8 |
| 447 push rsi | 447 push rsi |
| 448 push rdi | 448 push rdi |
| 449 ; end prolog | 449 ; end prolog |
| 450 | 450 |
| 451 HIGH_GET_PARAM | 451 HIGH_GET_PARAM |
| 452 .loop: | 452 .loop: |
| 453 movdqu xmm0, [rsi] ;load src | 453 movdqu xmm0, [rsi] ;load src |
| 454 movdqu xmm1, [rsi + 2] | 454 movdqu xmm1, [rsi + 2] |
| 455 | 455 |
| 456 HIGH_APPLY_FILTER_8 1 | 456 HIGH_APPLY_FILTER_8 1 |
| 457 jnz .loop | 457 jnz .loop |
| 458 | 458 |
| 459 ; begin epilog | 459 ; begin epilog |
| 460 pop rdi | 460 pop rdi |
| 461 pop rsi | 461 pop rsi |
| 462 RESTORE_XMM | 462 RESTORE_XMM |
| 463 UNSHADOW_ARGS | 463 UNSHADOW_ARGS |
| 464 pop rbp | 464 pop rbp |
| 465 ret | 465 ret |
| 466 | 466 |
| 467 global sym(vp9_high_filter_block1d16_h2_avg_sse2) PRIVATE | 467 global sym(vp9_highbd_filter_block1d16_h2_avg_sse2) PRIVATE |
| 468 sym(vp9_high_filter_block1d16_h2_avg_sse2): | 468 sym(vp9_highbd_filter_block1d16_h2_avg_sse2): |
| 469 push rbp | 469 push rbp |
| 470 mov rbp, rsp | 470 mov rbp, rsp |
| 471 SHADOW_ARGS_TO_STACK 7 | 471 SHADOW_ARGS_TO_STACK 7 |
| 472 SAVE_XMM 9 | 472 SAVE_XMM 9 |
| 473 push rsi | 473 push rsi |
| 474 push rdi | 474 push rdi |
| 475 ; end prolog | 475 ; end prolog |
| 476 | 476 |
| 477 HIGH_GET_PARAM | 477 HIGH_GET_PARAM |
| 478 .loop: | 478 .loop: |
| 479 movdqu xmm0, [rsi] ;load src | 479 movdqu xmm0, [rsi] ;load src |
| 480 movdqu xmm1, [rsi + 2] | 480 movdqu xmm1, [rsi + 2] |
| 481 movdqu xmm2, [rsi + 16] | 481 movdqu xmm2, [rsi + 16] |
| 482 movdqu xmm3, [rsi + 18] | 482 movdqu xmm3, [rsi + 18] |
| 483 | 483 |
| 484 HIGH_APPLY_FILTER_16 1 | 484 HIGH_APPLY_FILTER_16 1 |
| 485 jnz .loop | 485 jnz .loop |
| 486 | 486 |
| 487 ; begin epilog | 487 ; begin epilog |
| 488 pop rdi | 488 pop rdi |
| 489 pop rsi | 489 pop rsi |
| 490 RESTORE_XMM | 490 RESTORE_XMM |
| 491 UNSHADOW_ARGS | 491 UNSHADOW_ARGS |
| 492 pop rbp | 492 pop rbp |
| 493 ret | 493 ret |
| 494 %endif | 494 %endif |
| OLD | NEW |