| Index: source/libvpx/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
|
| ===================================================================
|
| --- source/libvpx/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm (revision 293081)
|
| +++ source/libvpx/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm (working copy)
|
| @@ -1,225 +0,0 @@
|
| -;
|
| -; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
| -;
|
| -; Use of this source code is governed by a BSD-style license
|
| -; that can be found in the LICENSE file in the root of the source
|
| -; tree. An additional intellectual property rights grant can be found
|
| -; in the file PATENTS. All contributing project authors may
|
| -; be found in the AUTHORS file in the root of the source tree.
|
| -;
|
| -
|
| -
|
| - EXPORT |vp8_fast_quantize_b_armv6|
|
| -
|
| - INCLUDE vp8_asm_enc_offsets.asm
|
| -
|
| - ARM
|
| - REQUIRE8
|
| - PRESERVE8
|
| -
|
| - AREA ||.text||, CODE, READONLY, ALIGN=2
|
| -
|
| -; r0 BLOCK *b
|
| -; r1 BLOCKD *d
|
| -|vp8_fast_quantize_b_armv6| PROC
|
| - stmfd sp!, {r1, r4-r11, lr}
|
| -
|
| - ldr r3, [r0, #vp8_block_coeff] ; coeff
|
| - ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
|
| - ldr r5, [r0, #vp8_block_round] ; round
|
| - ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
|
| - ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
|
| - ldr r8, [r1, #vp8_blockd_dequant] ; dequant
|
| -
|
| - ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
|
| - ; is used to update the counter so that
|
| - ; it can be used to mark nonzero
|
| - ; quantized coefficient pairs.
|
| -
|
| - mov r1, #0 ; flags for quantized coeffs
|
| -
|
| - ; PART 1: quantization and dequantization loop
|
| -loop
|
| - ldr r9, [r3], #4 ; [z1 | z0]
|
| - ldr r10, [r5], #4 ; [r1 | r0]
|
| - ldr r11, [r4], #4 ; [q1 | q0]
|
| -
|
| - ssat16 lr, #1, r9 ; [sz1 | sz0]
|
| - eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
|
| - ssub16 r9, r9, lr ; x = (z ^ sz) - sz
|
| - sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
|
| -
|
| - ldr r12, [r3], #4 ; [z3 | z2]
|
| -
|
| - smulbb r0, r9, r11 ; [(x0+r0)*q0]
|
| - smultt r9, r9, r11 ; [(x1+r1)*q1]
|
| -
|
| - ldr r10, [r5], #4 ; [r3 | r2]
|
| -
|
| - ssat16 r11, #1, r12 ; [sz3 | sz2]
|
| - eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
|
| - pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
|
| - ldr r9, [r4], #4 ; [q3 | q2]
|
| - ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
|
| -
|
| - sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
|
| -
|
| - eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
|
| -
|
| - smulbb r10, r12, r9 ; [(x2+r2)*q2]
|
| - smultt r12, r12, r9 ; [(x3+r3)*q3]
|
| -
|
| - ssub16 r0, r0, lr ; x = (y ^ sz) - sz
|
| -
|
| - cmp r0, #0 ; check if zero
|
| - orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
|
| -
|
| - str r0, [r6], #4 ; *qcoeff++ = x
|
| - ldr r9, [r8], #4 ; [dq1 | dq0]
|
| -
|
| - pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
|
| - eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
|
| - ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
|
| -
|
| - cmp r10, #0 ; check if zero
|
| - orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
|
| -
|
| - str r10, [r6], #4 ; *qcoeff++ = x
|
| - ldr r11, [r8], #4 ; [dq3 | dq2]
|
| -
|
| - smulbb r12, r0, r9 ; [x0*dq0]
|
| - smultt r0, r0, r9 ; [x1*dq1]
|
| -
|
| - smulbb r9, r10, r11 ; [x2*dq2]
|
| - smultt r10, r10, r11 ; [x3*dq3]
|
| -
|
| - lsls r2, r2, #2 ; update loop counter
|
| - strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
|
| - strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
|
| - strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
|
| - strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
|
| - add r7, r7, #8 ; dqcoeff += 8
|
| - bne loop
|
| -
|
| - ; PART 2: check position for eob...
|
| - ldr r11, [sp, #0] ; restore BLOCKD pointer
|
| - mov lr, #0 ; init eob
|
| - cmp r1, #0 ; coeffs after quantization?
|
| - ldr r12, [r11, #vp8_blockd_eob]
|
| - beq end ; skip eob calculations if all zero
|
| -
|
| - ldr r0, [r11, #vp8_blockd_qcoeff]
|
| -
|
| - ; check shortcut for nonzero qcoeffs
|
| - tst r1, #0x80
|
| - bne quant_coeff_15_14
|
| - tst r1, #0x20
|
| - bne quant_coeff_13_11
|
| - tst r1, #0x8
|
| - bne quant_coeff_12_7
|
| - tst r1, #0x40
|
| - bne quant_coeff_10_9
|
| - tst r1, #0x10
|
| - bne quant_coeff_8_3
|
| - tst r1, #0x2
|
| - bne quant_coeff_6_5
|
| - tst r1, #0x4
|
| - bne quant_coeff_4_2
|
| - b quant_coeff_1_0
|
| -
|
| -quant_coeff_15_14
|
| - ldrh r2, [r0, #30] ; rc=15, i=15
|
| - mov lr, #16
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| - ldrh r3, [r0, #28] ; rc=14, i=14
|
| - mov lr, #15
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| -quant_coeff_13_11
|
| - ldrh r2, [r0, #22] ; rc=11, i=13
|
| - mov lr, #14
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| -quant_coeff_12_7
|
| - ldrh r3, [r0, #14] ; rc=7, i=12
|
| - mov lr, #13
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| - ldrh r2, [r0, #20] ; rc=10, i=11
|
| - mov lr, #12
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| -quant_coeff_10_9
|
| - ldrh r3, [r0, #26] ; rc=13, i=10
|
| - mov lr, #11
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| - ldrh r2, [r0, #24] ; rc=12, i=9
|
| - mov lr, #10
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| -quant_coeff_8_3
|
| - ldrh r3, [r0, #18] ; rc=9, i=8
|
| - mov lr, #9
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| - ldrh r2, [r0, #12] ; rc=6, i=7
|
| - mov lr, #8
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| -quant_coeff_6_5
|
| - ldrh r3, [r0, #6] ; rc=3, i=6
|
| - mov lr, #7
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| - ldrh r2, [r0, #4] ; rc=2, i=5
|
| - mov lr, #6
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| -quant_coeff_4_2
|
| - ldrh r3, [r0, #10] ; rc=5, i=4
|
| - mov lr, #5
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| - ldrh r2, [r0, #16] ; rc=8, i=3
|
| - mov lr, #4
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| - ldrh r3, [r0, #8] ; rc=4, i=2
|
| - mov lr, #3
|
| - cmp r3, #0
|
| - bne end
|
| -
|
| -quant_coeff_1_0
|
| - ldrh r2, [r0, #2] ; rc=1, i=1
|
| - mov lr, #2
|
| - cmp r2, #0
|
| - bne end
|
| -
|
| - mov lr, #1 ; rc=0, i=0
|
| -
|
| -end
|
| - strb lr, [r12]
|
| - ldmfd sp!, {r1, r4-r11, pc}
|
| -
|
| - ENDP
|
| -
|
| -loop_count
|
| - DCD 0x1000000
|
| -
|
| - END
|
| -
|
|
|