libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm - Issue 7624054: Revert r97185 "Update libvpx snapshot to v0.9.7-p1 (Cayuga)."

Unified Diff: libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

Issue 7624054: Revert r97185 "Update libvpx snapshot to v0.9.7-p1 (Cayuga)." (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party

Patch Set: Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « libvpx/source/libvpx/vp8/common/arm/armv6/copymem16x16_v6.asm ('k') | libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

diff --git a/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm b/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

index 1ba91ddd657a1da84f84426f82b3d9a37fcfb7cb..03b5bccd73bfc46e398bdf7f23355166069c1c5f 100644

--- a/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

+++ b/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

@@ -10,8 +10,6 @@

EXPORT |vp8_filter_block2d_first_pass_armv6|

- EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|

- EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|

EXPORT |vp8_filter_block2d_second_pass_armv6|

EXPORT |vp8_filter4_block2d_second_pass_armv6|

EXPORT |vp8_filter_block2d_first_pass_only_armv6|

@@ -42,6 +40,11 @@

add r12, r3, #16 ; square off the output

sub sp, sp, #4

+ ;;IF ARCHITECTURE=6

+ ;pld [r0, #-2]

+ ;;pld [r0, #30]

+ ;;ENDIF

ldr r4, [r11] ; load up packed filter coefficients

ldr r5, [r11, #4]

ldr r6, [r11, #8]

@@ -98,200 +101,19 @@

bne width_loop_1st_6

- ldr r1, [sp] ; load and update dst address

- subs r7, r7, #0x10000

- add r0, r0, r2 ; move to next input line

- add r1, r1, #2 ; move over to next column

- str r1, [sp]

- bne height_loop_1st_6

- add sp, sp, #4

- ldmia sp!, {r4 - r11, pc}

- ENDP

-; --------------------------

-; 16x16 version

-; -----------------------------

-|vp8_filter_block2d_first_pass_16x16_armv6| PROC

- stmdb sp!, {r4 - r11, lr}

- ldr r11, [sp, #40] ; vp8_filter address

- ldr r7, [sp, #36] ; output height

- add r4, r2, #18 ; preload next low

- pld [r0, r4]

- sub r2, r2, r3 ; inside loop increments input array,

- ; so the height loop only needs to add

- ; r2 - width to the input pointer

- mov r3, r3, lsl #1 ; multiply width by 2 because using shorts

- add r12, r3, #16 ; square off the output

- sub sp, sp, #4

- ldr r4, [r11] ; load up packed filter coefficients

- ldr r5, [r11, #4]

- ldr r6, [r11, #8]

- str r1, [sp] ; push destination to stack

- mov r7, r7, lsl #16 ; height is top part of counter

-; six tap filter

-|height_loop_1st_16_6|

- ldrb r8, [r0, #-2] ; load source data

- ldrb r9, [r0, #-1]

- ldrb r10, [r0], #2

- orr r7, r7, r3, lsr #2 ; construct loop counter

-|width_loop_1st_16_6|

- ldrb r11, [r0, #-1]

- pkhbt lr, r8, r9, lsl #16 ; r9 | r8

- pkhbt r8, r9, r10, lsl #16 ; r10 | r9

- ldrb r9, [r0]

- smuad lr, lr, r4 ; apply the filter

- pkhbt r10, r10, r11, lsl #16 ; r11 | r10

- smuad r8, r8, r4

- pkhbt r11, r11, r9, lsl #16 ; r9 | r11

- smlad lr, r10, r5, lr

- ldrb r10, [r0, #1]

- smlad r8, r11, r5, r8

- ldrb r11, [r0, #2]

- sub r7, r7, #1

- pkhbt r9, r9, r10, lsl #16 ; r10 | r9

- pkhbt r10, r10, r11, lsl #16 ; r11 | r10

- smlad lr, r9, r6, lr

- smlad r11, r10, r6, r8

- ands r10, r7, #0xff ; test loop counter

- add lr, lr, #0x40 ; round_shift_and_clamp

- ldrneb r8, [r0, #-2] ; load data for next loop

- usat lr, #8, lr, asr #7

- add r11, r11, #0x40

- ldrneb r9, [r0, #-1]

- usat r11, #8, r11, asr #7

- strh lr, [r1], r12 ; result is transposed and stored, which

- ; will make second pass filtering easier.

- ldrneb r10, [r0], #2

- strh r11, [r1], r12

- bne width_loop_1st_16_6

+ ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines

+ ;;IF ARCHITECTURE=6

+ ;pld [r0, r2]

+ ;;pld [r0, r9]

+ ;;ENDIF

ldr r1, [sp] ; load and update dst address

subs r7, r7, #0x10000

add r0, r0, r2 ; move to next input line

- add r11, r2, #34 ; adding back block width(=16)

- pld [r0, r11] ; preload next low

add r1, r1, #2 ; move over to next column

str r1, [sp]

- bne height_loop_1st_16_6

- add sp, sp, #4

- ldmia sp!, {r4 - r11, pc}

- ENDP

-; --------------------------

-; 8x8 version

-; -----------------------------

-|vp8_filter_block2d_first_pass_8x8_armv6| PROC

- stmdb sp!, {r4 - r11, lr}

- ldr r11, [sp, #40] ; vp8_filter address

- ldr r7, [sp, #36] ; output height

- add r4, r2, #10 ; preload next low

- pld [r0, r4]

- sub r2, r2, r3 ; inside loop increments input array,

- ; so the height loop only needs to add

- ; r2 - width to the input pointer

- mov r3, r3, lsl #1 ; multiply width by 2 because using shorts

- add r12, r3, #16 ; square off the output

- sub sp, sp, #4

- ldr r4, [r11] ; load up packed filter coefficients

- ldr r5, [r11, #4]

- ldr r6, [r11, #8]

- str r1, [sp] ; push destination to stack

- mov r7, r7, lsl #16 ; height is top part of counter

-; six tap filter

-|height_loop_1st_8_6|

- ldrb r8, [r0, #-2] ; load source data

- ldrb r9, [r0, #-1]

- ldrb r10, [r0], #2

- orr r7, r7, r3, lsr #2 ; construct loop counter

-|width_loop_1st_8_6|

- ldrb r11, [r0, #-1]

- pkhbt lr, r8, r9, lsl #16 ; r9 | r8

- pkhbt r8, r9, r10, lsl #16 ; r10 | r9

- ldrb r9, [r0]

- smuad lr, lr, r4 ; apply the filter

- pkhbt r10, r10, r11, lsl #16 ; r11 | r10

- smuad r8, r8, r4

- pkhbt r11, r11, r9, lsl #16 ; r9 | r11

- smlad lr, r10, r5, lr

- ldrb r10, [r0, #1]

- smlad r8, r11, r5, r8

- ldrb r11, [r0, #2]

- sub r7, r7, #1

- pkhbt r9, r9, r10, lsl #16 ; r10 | r9

- pkhbt r10, r10, r11, lsl #16 ; r11 | r10

- smlad lr, r9, r6, lr

- smlad r11, r10, r6, r8

- ands r10, r7, #0xff ; test loop counter

- add lr, lr, #0x40 ; round_shift_and_clamp

- ldrneb r8, [r0, #-2] ; load data for next loop

- usat lr, #8, lr, asr #7

- add r11, r11, #0x40

- ldrneb r9, [r0, #-1]

- usat r11, #8, r11, asr #7

- strh lr, [r1], r12 ; result is transposed and stored, which

- ; will make second pass filtering easier.

- ldrneb r10, [r0], #2

- strh r11, [r1], r12

- bne width_loop_1st_8_6

- ldr r1, [sp] ; load and update dst address

- subs r7, r7, #0x10000

- add r0, r0, r2 ; move to next input line

- add r11, r2, #18 ; adding back block width(=8)

- pld [r0, r11] ; preload next low

- add r1, r1, #2 ; move over to next column

- str r1, [sp]

- bne height_loop_1st_8_6

+ bne height_loop_1st_6

add sp, sp, #4

ldmia sp!, {r4 - r11, pc}

@@ -440,10 +262,6 @@

|vp8_filter_block2d_first_pass_only_armv6| PROC

stmdb sp!, {r4 - r11, lr}

- add r7, r2, r3 ; preload next low

- add r7, r7, #2

- pld [r0, r7]

ldr r4, [sp, #36] ; output pitch

ldr r11, [sp, #40] ; HFilter address

sub sp, sp, #8

@@ -512,15 +330,16 @@

bne width_loop_1st_only_6

+ ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines

+ ;;IF ARCHITECTURE=6

+ ;pld [r0, r2]

+ ;;pld [r0, r9]

+ ;;ENDIF

ldr lr, [sp] ; load back output pitch

ldr r12, [sp, #4] ; load back output pitch

subs r7, r7, #1

add r0, r0, r12 ; updata src for next loop

- add r11, r12, r3 ; preload next low

- add r11, r11, #2

- pld [r0, r11]

add r1, r1, lr ; update dst for next loop

bne height_loop_1st_only_6