Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(560)

Unified Diff: libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm

Issue 7624054: Revert r97185 "Update libvpx snapshot to v0.9.7-p1 (Cayuga)." (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party
Patch Set: Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
diff --git a/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm b/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
index 1cbbbcdef5e2533b43a7095b9588d07b474d939b..b6417dee65f2b1606a86915035124f20136ba8b3 100644
--- a/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -53,11 +53,14 @@ count RN r5
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
-;r2 const char *blimit,
+;r2 const char *flimit,
;r3 const char *limit,
;stack const char *thresh,
;stack int count
+;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
+;for flimit. Same way applies to limit and thresh.
+
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
@@ -69,18 +72,14 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
- ldrb r4, [r2] ; blimit
+ ldr r4, [r2], #4 ; flimit
ldr r10, [src], pstep ; p2
- ldrb r2, [r3] ; limit
+ ldr r2, [r3], #4 ; limit
ldr r11, [src], pstep ; p1
- orr r4, r4, r4, lsl #8
- ldrb r3, [r6] ; thresh
- orr r2, r2, r2, lsl #8
+ uadd8 r4, r4, r4 ; flimit * 2
+ ldr r3, [r6], #4 ; thresh
mov count, count, lsl #1 ; 4-in-parallel
- orr r4, r4, r4, lsl #16
- orr r3, r3, r3, lsl #8
- orr r2, r2, r2, lsl #16
- orr r3, r3, r3, lsl #16
+ uadd8 r4, r4, r2 ; flimit * 2 + limit
|Hnext8|
; vp8_filter_mask() function
@@ -254,6 +253,12 @@ count RN r5
subs count, count, #1
+ ;pld [src]
+ ;pld [src, pstep]
+ ;pld [src, pstep, lsl #1]
+ ;pld [src, pstep, lsl #2]
+ ;pld [src, pstep, lsl #3]
+
ldrne r9, [src], pstep ; p3
ldrne r10, [src], pstep ; p2
ldrne r11, [src], pstep ; p1
@@ -276,18 +281,14 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
- ldrb r4, [r2] ; blimit
+ ldr r4, [r2], #4 ; flimit
ldr r10, [src], pstep ; p2
- ldrb r2, [r3] ; limit
+ ldr r2, [r3], #4 ; limit
ldr r11, [src], pstep ; p1
- orr r4, r4, r4, lsl #8
- ldrb r3, [r6] ; thresh
- orr r2, r2, r2, lsl #8
+ uadd8 r4, r4, r4 ; flimit * 2
+ ldr r3, [r6], #4 ; thresh
mov count, count, lsl #1 ; 4-in-parallel
- orr r4, r4, r4, lsl #16
- orr r3, r3, r3, lsl #8
- orr r2, r2, r2, lsl #16
- orr r3, r3, r3, lsl #16
+ uadd8 r4, r4, r2 ; flimit * 2 + limit
|MBHnext8|
@@ -589,19 +590,15 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
- ldrb r4, [r2] ; blimit
+ ldr r4, [r2], #4 ; flimit
ldr r7, [src], pstep
- ldrb r2, [r3] ; limit
+ ldr r2, [r3], #4 ; limit
ldr r8, [src], pstep
- orr r4, r4, r4, lsl #8
- ldrb r3, [r12] ; thresh
- orr r2, r2, r2, lsl #8
+ uadd8 r4, r4, r4 ; flimit * 2
+ ldr r3, [r12], #4 ; thresh
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
- orr r4, r4, r4, lsl #16
- orr r3, r3, r3, lsl #8
- orr r2, r2, r2, lsl #16
- orr r3, r3, r3, lsl #16
+ uadd8 r4, r4, r2 ; flimit * 2 + limit
|Vnext8|
@@ -860,26 +857,18 @@ count RN r5
sub src, src, #4 ; move src pointer down by 4
ldr count, [sp, #40] ; count for 8-in-parallel
ldr r12, [sp, #36] ; load thresh address
- pld [src, #23] ; preload for next block
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
- ldrb r4, [r2] ; blimit
- pld [src, #23]
+ ldr r4, [r2], #4 ; flimit
ldr r7, [src], pstep
- ldrb r2, [r3] ; limit
- pld [src, #23]
+ ldr r2, [r3], #4 ; limit
ldr r8, [src], pstep
- orr r4, r4, r4, lsl #8
- ldrb r3, [r12] ; thresh
- orr r2, r2, r2, lsl #8
- pld [src, #23]
+ uadd8 r4, r4, r4 ; flimit * 2
+ ldr r3, [r12], #4 ; thresh
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
- orr r4, r4, r4, lsl #16
- orr r3, r3, r3, lsl #8
- orr r2, r2, r2, lsl #16
- orr r3, r3, r3, lsl #16
+ uadd8 r4, r4, r2 ; flimit * 2 + limit
|MBVnext8|
; vp8_filter_mask() function
@@ -919,7 +908,6 @@ count RN r5
str lr, [sp, #8]
ldr lr, [src], pstep
-
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
ldr lr, [sp, #8] ; load back (f)limit accumulator
@@ -968,7 +956,6 @@ count RN r5
beq mbvskip_filter ; skip filtering
-
;vp8_hevmask() function
;calculate high edge variance
@@ -1136,7 +1123,6 @@ count RN r5
smlabb r8, r6, lr, r7
smlatb r6, r6, lr, r7
smlabb r9, r10, lr, r7
-
smlatb r10, r10, lr, r7
ssat r8, #8, r8, asr #7
ssat r6, #8, r6, asr #7
@@ -1256,13 +1242,9 @@ count RN r5
sub src, src, #4
subs count, count, #1
- pld [src, #23] ; preload for next block
ldrne r6, [src], pstep ; load source data
- pld [src, #23]
ldrne r7, [src], pstep
- pld [src, #23]
ldrne r8, [src], pstep
- pld [src, #23]
ldrne lr, [src], pstep
bne MBVnext8

Powered by Google App Engine
This is Rietveld 408576698