Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_variance16x16_armv6|
13
14 ARM
15 REQUIRE8
16 PRESERVE8
17
18 AREA ||.text||, CODE, READONLY, ALIGN=2
19
20 ; r0 unsigned char *src_ptr
21 ; r1 int source_stride
22 ; r2 unsigned char *ref_ptr
23 ; r3 int recon_stride
24 ; stack unsigned int *sse
25 |vp8_variance16x16_armv6| PROC
26
27 stmfd sp!, {r4-r12, lr}
28
29 pld [r0, r1, lsl #0]
30 pld [r2, r3, lsl #0]
31
32 mov r8, #0 ; initialize sum = 0
33 mov r11, #0 ; initialize sse = 0
34 mov r12, #16 ; set loop counter to 16 (=block height)
35
36 loop
37 ; 1st 4 pixels
38 ldr r4, [r0, #0] ; load 4 src pixels
39 ldr r5, [r2, #0] ; load 4 ref pixels
40
41 mov lr, #0 ; constant zero
42
43 usub8 r6, r4, r5 ; calculate difference
44 pld [r0, r1, lsl #1]
45 sel r7, r6, lr ; select bytes with positive difference
46 usub8 r9, r5, r4 ; calculate difference with reversed operands
47 pld [r2, r3, lsl #1]
48 sel r6, r9, lr ; select bytes with negative difference
49
50 ; calculate partial sums
51 usad8 r4, r7, lr ; calculate sum of positive differences
52 usad8 r5, r6, lr ; calculate sum of negative differences
53 orr r6, r6, r7 ; differences of all 4 pixels
54 ; calculate total sum
55 adds r8, r8, r4 ; add positive differences to sum
56 subs r8, r8, r5 ; subtract negative differences from sum
57
58 ; calculate sse
59 uxtb16 r5, r6 ; byte (two pixels) to halfwords
60 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
61 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
62
63 ; 2nd 4 pixels
64 ldr r4, [r0, #4] ; load 4 src pixels
65 ldr r5, [r2, #4] ; load 4 ref pixels
66 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
67
68 usub8 r6, r4, r5 ; calculate difference
69 sel r7, r6, lr ; select bytes with positive difference
70 usub8 r9, r5, r4 ; calculate difference with reversed operands
71 sel r6, r9, lr ; select bytes with negative difference
72
73 ; calculate partial sums
74 usad8 r4, r7, lr ; calculate sum of positive differences
75 usad8 r5, r6, lr ; calculate sum of negative differences
76 orr r6, r6, r7 ; differences of all 4 pixels
77
78 ; calculate total sum
79 add r8, r8, r4 ; add positive differences to sum
80 sub r8, r8, r5 ; subtract negative differences from sum
81
82 ; calculate sse
83 uxtb16 r5, r6 ; byte (two pixels) to halfwords
84 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
85 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
86
87 ; 3rd 4 pixels
88 ldr r4, [r0, #8] ; load 4 src pixels
89 ldr r5, [r2, #8] ; load 4 ref pixels
90 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
91
92 usub8 r6, r4, r5 ; calculate difference
93 sel r7, r6, lr ; select bytes with positive difference
94 usub8 r9, r5, r4 ; calculate difference with reversed operands
95 sel r6, r9, lr ; select bytes with negative difference
96
97 ; calculate partial sums
98 usad8 r4, r7, lr ; calculate sum of positive differences
99 usad8 r5, r6, lr ; calculate sum of negative differences
100 orr r6, r6, r7 ; differences of all 4 pixels
101
102 ; calculate total sum
103 add r8, r8, r4 ; add positive differences to sum
104 sub r8, r8, r5 ; subtract negative differences from sum
105
106 ; calculate sse
107 uxtb16 r5, r6 ; byte (two pixels) to halfwords
108 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
109 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
110
111 ; 4th 4 pixels
112 ldr r4, [r0, #12] ; load 4 src pixels
113 ldr r5, [r2, #12] ; load 4 ref pixels
114 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
115
116 usub8 r6, r4, r5 ; calculate difference
117 add r0, r0, r1 ; set src_ptr to next row
118 sel r7, r6, lr ; select bytes with positive difference
119 usub8 r9, r5, r4 ; calculate difference with reversed operands
120 add r2, r2, r3 ; set dst_ptr to next row
121 sel r6, r9, lr ; select bytes with negative difference
122
123 ; calculate partial sums
124 usad8 r4, r7, lr ; calculate sum of positive differences
125 usad8 r5, r6, lr ; calculate sum of negative differences
126 orr r6, r6, r7 ; differences of all 4 pixels
127
128 ; calculate total sum
129 add r8, r8, r4 ; add positive differences to sum
130 sub r8, r8, r5 ; subtract negative differences from sum
131
132 ; calculate sse
133 uxtb16 r5, r6 ; byte (two pixels) to halfwords
134 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
135 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
136 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
137
138
139 subs r12, r12, #1
140
141 bne loop
142
143 ; return stuff
144 ldr r6, [sp, #40] ; get address of sse
145 mul r0, r8, r8 ; sum * sum
146 str r11, [r6] ; store sse
147 sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
148
149 ldmfd sp!, {r4-r12, pc}
150
151 ENDP
152
153 END
154
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/alloccommon.c ('k') | source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698