Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Unified Diff: libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm

Issue 7624054: Revert r97185 "Update libvpx snapshot to v0.9.7-p1 (Cayuga)." (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party
Patch Set: Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm
diff --git a/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm b/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm
index 1ba91ddd657a1da84f84426f82b3d9a37fcfb7cb..03b5bccd73bfc46e398bdf7f23355166069c1c5f 100644
--- a/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm
+++ b/libvpx/source/libvpx/vp8/common/arm/armv6/filter_v6.asm
@@ -10,8 +10,6 @@
EXPORT |vp8_filter_block2d_first_pass_armv6|
- EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
- EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
EXPORT |vp8_filter_block2d_second_pass_armv6|
EXPORT |vp8_filter4_block2d_second_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
@@ -42,6 +40,11 @@
add r12, r3, #16 ; square off the output
sub sp, sp, #4
+ ;;IF ARCHITECTURE=6
+ ;pld [r0, #-2]
+ ;;pld [r0, #30]
+ ;;ENDIF
+
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
@@ -98,200 +101,19 @@
bne width_loop_1st_6
- ldr r1, [sp] ; load and update dst address
- subs r7, r7, #0x10000
- add r0, r0, r2 ; move to next input line
-
- add r1, r1, #2 ; move over to next column
- str r1, [sp]
-
- bne height_loop_1st_6
-
- add sp, sp, #4
- ldmia sp!, {r4 - r11, pc}
-
- ENDP
-
-; --------------------------
-; 16x16 version
-; -----------------------------
-|vp8_filter_block2d_first_pass_16x16_armv6| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr r11, [sp, #40] ; vp8_filter address
- ldr r7, [sp, #36] ; output height
-
- add r4, r2, #18 ; preload next low
- pld [r0, r4]
-
- sub r2, r2, r3 ; inside loop increments input array,
- ; so the height loop only needs to add
- ; r2 - width to the input pointer
-
- mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
- add r12, r3, #16 ; square off the output
- sub sp, sp, #4
-
- ldr r4, [r11] ; load up packed filter coefficients
- ldr r5, [r11, #4]
- ldr r6, [r11, #8]
-
- str r1, [sp] ; push destination to stack
- mov r7, r7, lsl #16 ; height is top part of counter
-
-; six tap filter
-|height_loop_1st_16_6|
- ldrb r8, [r0, #-2] ; load source data
- ldrb r9, [r0, #-1]
- ldrb r10, [r0], #2
- orr r7, r7, r3, lsr #2 ; construct loop counter
-
-|width_loop_1st_16_6|
- ldrb r11, [r0, #-1]
-
- pkhbt lr, r8, r9, lsl #16 ; r9 | r8
- pkhbt r8, r9, r10, lsl #16 ; r10 | r9
-
- ldrb r9, [r0]
-
- smuad lr, lr, r4 ; apply the filter
- pkhbt r10, r10, r11, lsl #16 ; r11 | r10
- smuad r8, r8, r4
- pkhbt r11, r11, r9, lsl #16 ; r9 | r11
-
- smlad lr, r10, r5, lr
- ldrb r10, [r0, #1]
- smlad r8, r11, r5, r8
- ldrb r11, [r0, #2]
-
- sub r7, r7, #1
-
- pkhbt r9, r9, r10, lsl #16 ; r10 | r9
- pkhbt r10, r10, r11, lsl #16 ; r11 | r10
-
- smlad lr, r9, r6, lr
- smlad r11, r10, r6, r8
-
- ands r10, r7, #0xff ; test loop counter
-
- add lr, lr, #0x40 ; round_shift_and_clamp
- ldrneb r8, [r0, #-2] ; load data for next loop
- usat lr, #8, lr, asr #7
- add r11, r11, #0x40
- ldrneb r9, [r0, #-1]
- usat r11, #8, r11, asr #7
-
- strh lr, [r1], r12 ; result is transposed and stored, which
- ; will make second pass filtering easier.
- ldrneb r10, [r0], #2
- strh r11, [r1], r12
-
- bne width_loop_1st_16_6
+ ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
+ ;;IF ARCHITECTURE=6
+ ;pld [r0, r2]
+ ;;pld [r0, r9]
+ ;;ENDIF
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
-
- add r11, r2, #34 ; adding back block width(=16)
- pld [r0, r11] ; preload next low
-
add r1, r1, #2 ; move over to next column
str r1, [sp]
- bne height_loop_1st_16_6
-
- add sp, sp, #4
- ldmia sp!, {r4 - r11, pc}
-
- ENDP
-
-; --------------------------
-; 8x8 version
-; -----------------------------
-|vp8_filter_block2d_first_pass_8x8_armv6| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr r11, [sp, #40] ; vp8_filter address
- ldr r7, [sp, #36] ; output height
-
- add r4, r2, #10 ; preload next low
- pld [r0, r4]
-
- sub r2, r2, r3 ; inside loop increments input array,
- ; so the height loop only needs to add
- ; r2 - width to the input pointer
-
- mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
- add r12, r3, #16 ; square off the output
- sub sp, sp, #4
-
- ldr r4, [r11] ; load up packed filter coefficients
- ldr r5, [r11, #4]
- ldr r6, [r11, #8]
-
- str r1, [sp] ; push destination to stack
- mov r7, r7, lsl #16 ; height is top part of counter
-
-; six tap filter
-|height_loop_1st_8_6|
- ldrb r8, [r0, #-2] ; load source data
- ldrb r9, [r0, #-1]
- ldrb r10, [r0], #2
- orr r7, r7, r3, lsr #2 ; construct loop counter
-
-|width_loop_1st_8_6|
- ldrb r11, [r0, #-1]
-
- pkhbt lr, r8, r9, lsl #16 ; r9 | r8
- pkhbt r8, r9, r10, lsl #16 ; r10 | r9
-
- ldrb r9, [r0]
-
- smuad lr, lr, r4 ; apply the filter
- pkhbt r10, r10, r11, lsl #16 ; r11 | r10
- smuad r8, r8, r4
- pkhbt r11, r11, r9, lsl #16 ; r9 | r11
-
- smlad lr, r10, r5, lr
- ldrb r10, [r0, #1]
- smlad r8, r11, r5, r8
- ldrb r11, [r0, #2]
-
- sub r7, r7, #1
-
- pkhbt r9, r9, r10, lsl #16 ; r10 | r9
- pkhbt r10, r10, r11, lsl #16 ; r11 | r10
-
- smlad lr, r9, r6, lr
- smlad r11, r10, r6, r8
-
- ands r10, r7, #0xff ; test loop counter
-
- add lr, lr, #0x40 ; round_shift_and_clamp
- ldrneb r8, [r0, #-2] ; load data for next loop
- usat lr, #8, lr, asr #7
- add r11, r11, #0x40
- ldrneb r9, [r0, #-1]
- usat r11, #8, r11, asr #7
-
- strh lr, [r1], r12 ; result is transposed and stored, which
- ; will make second pass filtering easier.
- ldrneb r10, [r0], #2
- strh r11, [r1], r12
-
- bne width_loop_1st_8_6
-
- ldr r1, [sp] ; load and update dst address
- subs r7, r7, #0x10000
- add r0, r0, r2 ; move to next input line
-
- add r11, r2, #18 ; adding back block width(=8)
- pld [r0, r11] ; preload next low
-
- add r1, r1, #2 ; move over to next column
- str r1, [sp]
-
- bne height_loop_1st_8_6
+ bne height_loop_1st_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
@@ -440,10 +262,6 @@
|vp8_filter_block2d_first_pass_only_armv6| PROC
stmdb sp!, {r4 - r11, lr}
- add r7, r2, r3 ; preload next low
- add r7, r7, #2
- pld [r0, r7]
-
ldr r4, [sp, #36] ; output pitch
ldr r11, [sp, #40] ; HFilter address
sub sp, sp, #8
@@ -512,15 +330,16 @@
bne width_loop_1st_only_6
+ ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
+ ;;IF ARCHITECTURE=6
+ ;pld [r0, r2]
+ ;;pld [r0, r9]
+ ;;ENDIF
+
ldr lr, [sp] ; load back output pitch
ldr r12, [sp, #4] ; load back output pitch
subs r7, r7, #1
add r0, r0, r12 ; updata src for next loop
-
- add r11, r12, r3 ; preload next low
- add r11, r11, #2
- pld [r0, r11]
-
add r1, r1, lr ; update dst for next loop
bne height_loop_1st_only_6

Powered by Google App Engine
This is Rietveld 408576698