OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_subtract_mby_armv6| | |
13 EXPORT |vp8_subtract_mbuv_armv6| | |
14 EXPORT |vp8_subtract_b_armv6| | |
15 | |
16 INCLUDE vp8_asm_enc_offsets.asm | |
17 | |
18 ARM | |
19 REQUIRE8 | |
20 PRESERVE8 | |
21 | |
22 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
23 | |
24 ; r0 BLOCK *be | |
25 ; r1 BLOCKD *bd | |
26 ; r2 int pitch | |
27 |vp8_subtract_b_armv6| PROC | |
28 | |
29 stmfd sp!, {r4-r9} | |
30 | |
31 ldr r4, [r0, #vp8_block_base_src] | |
32 ldr r5, [r0, #vp8_block_src] | |
33 ldr r6, [r0, #vp8_block_src_diff] | |
34 | |
35 ldr r3, [r4] | |
36 ldr r7, [r0, #vp8_block_src_stride] | |
37 add r3, r3, r5 ; src = *base_src + src | |
38 ldr r8, [r1, #vp8_blockd_predictor] | |
39 | |
40 mov r9, #4 ; loop count | |
41 | |
42 loop_block | |
43 | |
44 ldr r0, [r3], r7 ; src | |
45 ldr r1, [r8], r2 ; pred | |
46 | |
47 uxtb16 r4, r0 ; [s2 | s0] | |
48 uxtb16 r5, r1 ; [p2 | p0] | |
49 uxtb16 r0, r0, ror #8 ; [s3 | s1] | |
50 uxtb16 r1, r1, ror #8 ; [p3 | p1] | |
51 | |
52 usub16 r4, r4, r5 ; [d2 | d0] | |
53 usub16 r5, r0, r1 ; [d3 | d1] | |
54 | |
55 subs r9, r9, #1 ; decrement loop counter | |
56 | |
57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] | |
58 pkhtb r1, r5, r4, asr #16 ; [d3 | d2] | |
59 | |
60 str r0, [r6, #0] ; diff | |
61 str r1, [r6, #4] ; diff | |
62 | |
63 add r6, r6, r2, lsl #1 ; update diff pointer | |
64 bne loop_block | |
65 | |
66 ldmfd sp!, {r4-r9} | |
67 mov pc, lr | |
68 | |
69 ENDP | |
70 | |
71 | |
72 ; r0 short *diff | |
73 ; r1 unsigned char *usrc | |
74 ; r2 unsigned char *vsrc | |
75 ; r3 int src_stride | |
76 ; sp unsigned char *upred | |
77 ; sp unsigned char *vpred | |
78 ; sp int pred_stride | |
79 |vp8_subtract_mbuv_armv6| PROC | |
80 | |
81 stmfd sp!, {r4-r11} | |
82 | |
83 add r0, r0, #512 ; set *diff point to Cb | |
84 mov r4, #8 ; loop count | |
85 ldr r5, [sp, #32] ; upred | |
86 ldr r12, [sp, #40] ; pred_stride | |
87 | |
88 ; Subtract U block | |
89 loop_u | |
90 ldr r6, [r1] ; usrc (A) | |
91 ldr r7, [r5] ; upred (A) | |
92 | |
93 uxtb16 r8, r6 ; [s2 | s0] (A) | |
94 uxtb16 r9, r7 ; [p2 | p0] (A) | |
95 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) | |
96 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) | |
97 | |
98 usub16 r6, r8, r9 ; [d2 | d0] (A) | |
99 usub16 r7, r10, r11 ; [d3 | d1] (A) | |
100 | |
101 ldr r10, [r1, #4] ; usrc (B) | |
102 ldr r11, [r5, #4] ; upred (B) | |
103 | |
104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) | |
105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) | |
106 | |
107 str r8, [r0], #4 ; diff (A) | |
108 uxtb16 r8, r10 ; [s2 | s0] (B) | |
109 str r9, [r0], #4 ; diff (A) | |
110 | |
111 uxtb16 r9, r11 ; [p2 | p0] (B) | |
112 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) | |
113 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) | |
114 | |
115 usub16 r6, r8, r9 ; [d2 | d0] (B) | |
116 usub16 r7, r10, r11 ; [d3 | d1] (B) | |
117 | |
118 add r1, r1, r3 ; update usrc pointer | |
119 add r5, r5, r12 ; update upred pointer | |
120 | |
121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) | |
122 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) | |
123 | |
124 str r8, [r0], #4 ; diff (B) | |
125 subs r4, r4, #1 ; update loop counter | |
126 str r9, [r0], #4 ; diff (B) | |
127 | |
128 bne loop_u | |
129 | |
130 ldr r5, [sp, #36] ; vpred | |
131 mov r4, #8 ; loop count | |
132 | |
133 ; Subtract V block | |
134 loop_v | |
135 ldr r6, [r2] ; vsrc (A) | |
136 ldr r7, [r5] ; vpred (A) | |
137 | |
138 uxtb16 r8, r6 ; [s2 | s0] (A) | |
139 uxtb16 r9, r7 ; [p2 | p0] (A) | |
140 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) | |
141 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) | |
142 | |
143 usub16 r6, r8, r9 ; [d2 | d0] (A) | |
144 usub16 r7, r10, r11 ; [d3 | d1] (A) | |
145 | |
146 ldr r10, [r2, #4] ; vsrc (B) | |
147 ldr r11, [r5, #4] ; vpred (B) | |
148 | |
149 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) | |
150 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) | |
151 | |
152 str r8, [r0], #4 ; diff (A) | |
153 uxtb16 r8, r10 ; [s2 | s0] (B) | |
154 str r9, [r0], #4 ; diff (A) | |
155 | |
156 uxtb16 r9, r11 ; [p2 | p0] (B) | |
157 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) | |
158 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) | |
159 | |
160 usub16 r6, r8, r9 ; [d2 | d0] (B) | |
161 usub16 r7, r10, r11 ; [d3 | d1] (B) | |
162 | |
163 add r2, r2, r3 ; update vsrc pointer | |
164 add r5, r5, r12 ; update vpred pointer | |
165 | |
166 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) | |
167 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) | |
168 | |
169 str r8, [r0], #4 ; diff (B) | |
170 subs r4, r4, #1 ; update loop counter | |
171 str r9, [r0], #4 ; diff (B) | |
172 | |
173 bne loop_v | |
174 | |
175 ldmfd sp!, {r4-r11} | |
176 bx lr | |
177 | |
178 ENDP | |
179 | |
180 | |
181 ; r0 short *diff | |
182 ; r1 unsigned char *src | |
183 ; r2 int src_stride | |
184 ; r3 unsigned char *pred | |
185 ; sp int pred_stride | |
186 |vp8_subtract_mby_armv6| PROC | |
187 | |
188 stmfd sp!, {r4-r11} | |
189 ldr r12, [sp, #32] ; pred_stride | |
190 mov r4, #16 | |
191 loop | |
192 ldr r6, [r1] ; src (A) | |
193 ldr r7, [r3] ; pred (A) | |
194 | |
195 uxtb16 r8, r6 ; [s2 | s0] (A) | |
196 uxtb16 r9, r7 ; [p2 | p0] (A) | |
197 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) | |
198 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) | |
199 | |
200 usub16 r6, r8, r9 ; [d2 | d0] (A) | |
201 usub16 r7, r10, r11 ; [d3 | d1] (A) | |
202 | |
203 ldr r10, [r1, #4] ; src (B) | |
204 ldr r11, [r3, #4] ; pred (B) | |
205 | |
206 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) | |
207 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) | |
208 | |
209 str r8, [r0], #4 ; diff (A) | |
210 uxtb16 r8, r10 ; [s2 | s0] (B) | |
211 str r9, [r0], #4 ; diff (A) | |
212 | |
213 uxtb16 r9, r11 ; [p2 | p0] (B) | |
214 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) | |
215 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) | |
216 | |
217 usub16 r6, r8, r9 ; [d2 | d0] (B) | |
218 usub16 r7, r10, r11 ; [d3 | d1] (B) | |
219 | |
220 ldr r10, [r1, #8] ; src (C) | |
221 ldr r11, [r3, #8] ; pred (C) | |
222 | |
223 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) | |
224 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) | |
225 | |
226 str r8, [r0], #4 ; diff (B) | |
227 uxtb16 r8, r10 ; [s2 | s0] (C) | |
228 str r9, [r0], #4 ; diff (B) | |
229 | |
230 uxtb16 r9, r11 ; [p2 | p0] (C) | |
231 uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) | |
232 uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) | |
233 | |
234 usub16 r6, r8, r9 ; [d2 | d0] (C) | |
235 usub16 r7, r10, r11 ; [d3 | d1] (C) | |
236 | |
237 ldr r10, [r1, #12] ; src (D) | |
238 ldr r11, [r3, #12] ; pred (D) | |
239 | |
240 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) | |
241 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) | |
242 | |
243 str r8, [r0], #4 ; diff (C) | |
244 uxtb16 r8, r10 ; [s2 | s0] (D) | |
245 str r9, [r0], #4 ; diff (C) | |
246 | |
247 uxtb16 r9, r11 ; [p2 | p0] (D) | |
248 uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) | |
249 uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) | |
250 | |
251 usub16 r6, r8, r9 ; [d2 | d0] (D) | |
252 usub16 r7, r10, r11 ; [d3 | d1] (D) | |
253 | |
254 add r1, r1, r2 ; update src pointer | |
255 add r3, r3, r12 ; update pred pointer | |
256 | |
257 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) | |
258 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) | |
259 | |
260 str r8, [r0], #4 ; diff (D) | |
261 subs r4, r4, #1 ; update loop counter | |
262 str r9, [r0], #4 ; diff (D) | |
263 | |
264 bne loop | |
265 | |
266 ldmfd sp!, {r4-r11} | |
267 bx lr | |
268 | |
269 ENDP | |
270 | |
271 END | |
272 | |
OLD | NEW |