OLD | NEW |
(Empty) | |
| 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license and patent |
| 5 ; grant that can be found in the LICENSE file in the root of the source |
| 6 ; tree. All contributing project authors may be found in the AUTHORS |
| 7 ; file in the root of the source tree. |
| 8 ; |
| 9 |
| 10 |
| 11 %include "vpx_ports/x86_abi_support.asm" |
| 12 %include "vp9_asm_enc_offsets.asm" |
| 13 |
| 14 |
| 15 ; void vp9_fast_quantize_b_ssse3 | arg |
| 16 ; (BLOCK *b, | 0 |
| 17 ; BLOCKD *d) | 1 |
| 18 ; |
| 19 |
| 20 global sym(vp9_fast_quantize_b_ssse3) |
| 21 sym(vp9_fast_quantize_b_ssse3): |
| 22 push rbp |
| 23 mov rbp, rsp |
| 24 GET_GOT rbx |
| 25 |
| 26 %if ABI_IS_32BIT |
| 27 push rdi |
| 28 push rsi |
| 29 %else |
| 30 %ifidn __OUTPUT_FORMAT__,x64 |
| 31 push rdi |
| 32 push rsi |
| 33 %endif |
| 34 %endif |
| 35 ; end prolog |
| 36 |
| 37 %if ABI_IS_32BIT |
| 38 mov rdi, arg(0) ; BLOCK *b |
| 39 mov rsi, arg(1) ; BLOCKD *d |
| 40 %else |
| 41 %ifidn __OUTPUT_FORMAT__,x64 |
| 42 mov rdi, rcx ; BLOCK *b |
| 43 mov rsi, rdx ; BLOCKD *d |
| 44 %else |
| 45 ;mov rdi, rdi ; BLOCK *b |
| 46 ;mov rsi, rsi ; BLOCKD *d |
| 47 %endif |
| 48 %endif |
| 49 |
| 50 mov rax, [rdi + vp9_block_coeff] |
| 51 mov rcx, [rdi + vp9_block_round] |
| 52 mov rdx, [rdi + vp9_block_quant_fast] |
| 53 |
| 54 ; coeff |
| 55 movdqa xmm0, [rax] |
| 56 movdqa xmm4, [rax + 16] |
| 57 |
| 58 ; round |
| 59 movdqa xmm2, [rcx] |
| 60 movdqa xmm3, [rcx + 16] |
| 61 |
| 62 movdqa xmm1, xmm0 |
| 63 movdqa xmm5, xmm4 |
| 64 |
| 65 ; sz = z >> 15 |
| 66 psraw xmm0, 15 |
| 67 psraw xmm4, 15 |
| 68 |
| 69 pabsw xmm1, xmm1 |
| 70 pabsw xmm5, xmm5 |
| 71 |
| 72 paddw xmm1, xmm2 |
| 73 paddw xmm5, xmm3 |
| 74 |
| 75 ; quant_fast |
| 76 pmulhw xmm1, [rdx] |
| 77 pmulhw xmm5, [rdx + 16] |
| 78 |
| 79 mov rax, [rsi + vp9_blockd_qcoeff] |
| 80 mov rdi, [rsi + vp9_blockd_dequant] |
| 81 mov rcx, [rsi + vp9_blockd_dqcoeff] |
| 82 |
| 83 pxor xmm1, xmm0 |
| 84 pxor xmm5, xmm4 |
| 85 psubw xmm1, xmm0 |
| 86 psubw xmm5, xmm4 |
| 87 |
| 88 movdqa [rax], xmm1 |
| 89 movdqa [rax + 16], xmm5 |
| 90 |
| 91 movdqa xmm2, [rdi] |
| 92 movdqa xmm3, [rdi + 16] |
| 93 |
| 94 pxor xmm4, xmm4 |
| 95 pmullw xmm2, xmm1 |
| 96 pmullw xmm3, xmm5 |
| 97 |
| 98 pcmpeqw xmm1, xmm4 ;non zero mask |
| 99 pcmpeqw xmm5, xmm4 ;non zero mask |
| 100 packsswb xmm1, xmm5 |
| 101 pshufb xmm1, [GLOBAL(zz_shuf)] |
| 102 |
| 103 pmovmskb edx, xmm1 |
| 104 |
| 105 xor rdi, rdi |
| 106 mov eax, -1 |
| 107 xor dx, ax ;flip the bits for bsr |
| 108 bsr eax, edx |
| 109 |
| 110 movdqa [rcx], xmm2 ;store dqcoeff |
| 111 movdqa [rcx + 16], xmm3 ;store dqcoeff |
| 112 |
| 113 sub edi, edx ;check for all zeros in bit mask |
| 114 sar edi, 31 ;0 or -1 |
| 115 add eax, 1 |
| 116 and eax, edi ;if the bit mask was all zero, |
| 117 ;then eob = 0 |
| 118 mov [rsi + vp9_blockd_eob], eax |
| 119 |
| 120 ; begin epilog |
| 121 %if ABI_IS_32BIT |
| 122 pop rsi |
| 123 pop rdi |
| 124 %else |
| 125 %ifidn __OUTPUT_FORMAT__,x64 |
| 126 pop rsi |
| 127 pop rdi |
| 128 %endif |
| 129 %endif |
| 130 |
| 131 RESTORE_GOT |
| 132 pop rbp |
| 133 ret |
| 134 |
| 135 SECTION_RODATA |
| 136 align 16 |
| 137 zz_shuf: |
| 138 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 |
OLD | NEW |