| Index: source/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm
|
| ===================================================================
|
| --- source/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm (revision 293081)
|
| +++ source/libvpx/vp9/common/arm/neon/vp9_convolve8_neon.asm (working copy)
|
| @@ -78,7 +78,7 @@
|
|
|
| mov r10, r6 ; w loop counter
|
|
|
| -loop_horiz_v
|
| +vp9_convolve8_loop_horiz_v
|
| vld1.8 {d24}, [r0], r1
|
| vld1.8 {d25}, [r0], r1
|
| vld1.8 {d26}, [r0], r1
|
| @@ -101,7 +101,7 @@
|
|
|
| add r0, r0, #3
|
|
|
| -loop_horiz
|
| +vp9_convolve8_loop_horiz
|
| add r5, r0, #64
|
|
|
| vld1.32 {d28[]}, [r0], r1
|
| @@ -159,7 +159,7 @@
|
| vmov q9, q13
|
|
|
| subs r6, r6, #4 ; w -= 4
|
| - bgt loop_horiz
|
| + bgt vp9_convolve8_loop_horiz
|
|
|
| ; outer loop
|
| mov r6, r10 ; restore w counter
|
| @@ -166,7 +166,7 @@
|
| add r0, r0, r9 ; src += src_stride * 4 - w
|
| add r2, r2, r12 ; dst += dst_stride * 4 - w
|
| subs r7, r7, #4 ; h -= 4
|
| - bgt loop_horiz_v
|
| + bgt vp9_convolve8_loop_horiz_v
|
|
|
| pop {r4-r10, pc}
|
|
|
| @@ -192,7 +192,7 @@
|
| lsl r1, r1, #1
|
| lsl r3, r3, #1
|
|
|
| -loop_vert_h
|
| +vp9_convolve8_loop_vert_h
|
| mov r4, r0
|
| add r7, r0, r1, asr #1
|
| mov r5, r2
|
| @@ -212,7 +212,7 @@
|
| vmovl.u8 q10, d20
|
| vmovl.u8 q11, d22
|
|
|
| -loop_vert
|
| +vp9_convolve8_loop_vert
|
| ; always process a 4x4 block at a time
|
| vld1.u32 {d24[0]}, [r7], r1
|
| vld1.u32 {d26[0]}, [r4], r1
|
| @@ -266,13 +266,13 @@
|
| vmov d22, d25
|
|
|
| subs r12, r12, #4 ; h -= 4
|
| - bgt loop_vert
|
| + bgt vp9_convolve8_loop_vert
|
|
|
| ; outer loop
|
| add r0, r0, #4
|
| add r2, r2, #4
|
| subs r6, r6, #4 ; w -= 4
|
| - bgt loop_vert_h
|
| + bgt vp9_convolve8_loop_vert_h
|
|
|
| pop {r4-r8, pc}
|
|
|
|
|