Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: skia/ext/convolver_mips_dspr2.cc

Issue 2011713003: Roll skia to 8cc209111876b7c78b5ec577c9221d8ed5e21024 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « skia/ext/convolver_mips_dspr2.h ('k') | skia/ext/convolver_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include "skia/ext/convolver.h"
7 #include "skia/ext/convolver_mips_dspr2.h"
8 #include "third_party/skia/include/core/SkTypes.h"
9
10 namespace skia {
11 // Convolves horizontally along a single row. The row data is given in
12 // |src_data| and continues for the num_values() of the filter.
13 void ConvolveHorizontally_mips_dspr2(const unsigned char* src_data,
14 const ConvolutionFilter1D& filter,
15 unsigned char* out_row,
16 bool has_alpha) {
17 #if SIMD_MIPS_DSPR2
18 int row_to_filter = 0;
19 int num_values = filter.num_values();
20 if (has_alpha) {
21 for (int out_x = 0; out_x < num_values; out_x++) {
22 // Get the filter that determines the current output pixel.
23 int filter_offset, filter_length;
24 const ConvolutionFilter1D::Fixed* filter_values =
25 filter.FilterForValue(out_x, &filter_offset, &filter_length);
26 int filter_x = 0;
27
28 __asm__ __volatile__(
29 ".set push \n"
30 ".set noreorder \n"
31
32 "beqz %[filter_len], 3f \n"
33 " sll $t0, %[filter_offset], 2 \n"
34 "addu %[rtf], %[src_data], $t0 \n"
35 "mtlo $0, $ac0 \n"
36 "mtlo $0, $ac1 \n"
37 "mtlo $0, $ac2 \n"
38 "mtlo $0, $ac3 \n"
39 "srl $t7, %[filter_len], 2 \n"
40 "beqz $t7, 2f \n"
41 " li %[fx], 0 \n"
42
43 "11: \n"
44 "addu $t4, %[filter_val], %[fx] \n"
45 "sll $t5, %[fx], 1 \n"
46 "ulw $t6, 0($t4) \n" // t6 =
47 // |cur[1]|cur[0]|
48 "ulw $t8, 4($t4) \n" // t8 =
49 // |cur[3]|cur[2]|
50 "addu $t0, %[rtf], $t5 \n"
51 "lw $t1, 0($t0) \n" // t1 = |a0|b0|g0|r0|
52 "lw $t2, 4($t0) \n" // t2 = |a1|b1|g1|r1|
53 "lw $t3, 8($t0) \n" // t3 = |a2|b2|g2|r2|
54 "lw $t4, 12($t0) \n" // t4 = |a3|b3|g3|r3|
55 "precrq.qb.ph $t0, $t2, $t1 \n" // t0 = |a1|g1|a0|g0|
56 "precr.qb.ph $t5, $t2, $t1 \n" // t5 = |b1|r1|b0|r0|
57 "preceu.ph.qbla $t1, $t0 \n" // t1 = |0|a1|0|a0|
58 "preceu.ph.qbra $t2, $t0 \n" // t2 = |0|g1|0|g0|
59 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|b1|0|b0|
60 "preceu.ph.qbra $t5, $t5 \n" // t5 = |0|r1|0|r0|
61 "dpa.w.ph $ac0, $t1, $t6 \n" // ac0+(cur*a1)+(cur* a0)
62 "dpa.w.ph $ac1, $t0, $t6 \n" // ac1+(cur*b1)+(cur* b0)
63 "dpa.w.ph $ac2, $t2, $t6 \n" // ac2+(cur*g1)+(cur* g0)
64 "dpa.w.ph $ac3, $t5, $t6 \n" // ac3+(cur*r1)+(cur* r0)
65 "precrq.qb.ph $t0, $t4, $t3 \n" // t0 = |a3|g3|a2|g2|
66 "precr.qb.ph $t5, $t4, $t3 \n" // t5 = |b3|r3|b2|r2|
67 "preceu.ph.qbla $t1, $t0 \n" // t1 = |0|a3|0|a2|
68 "preceu.ph.qbra $t2, $t0 \n" // t2 = |0|g3|0|g2|
69 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|b3|0|b2|
70 "preceu.ph.qbra $t5, $t5 \n" // t5 = |0|r3|0|r2|
71 "dpa.w.ph $ac0, $t1, $t8 \n" // ac0+(cur*a3)+(cur* a2)
72 "dpa.w.ph $ac1, $t0, $t8 \n" // ac1+(cur*b3)+(cur* b2)
73 "dpa.w.ph $ac2, $t2, $t8 \n" // ac2+(cur*g3)+(cur* g2)
74 "dpa.w.ph $ac3, $t5, $t8 \n" // ac3+(cur*r3)+(cur* r2)
75 "addiu $t7, $t7, -1 \n"
76 "bgtz $t7, 11b \n"
77 " addiu %[fx], %[fx], 8 \n"
78
79 "2: \n"
80 "andi $t7, %[filter_len], 0x3 \n" // residual
81 "beqz $t7, 3f \n"
82 " nop \n"
83
84 "21: \n"
85 "sll $t1, %[fx], 1 \n"
86 "addu $t2, %[filter_val], %[fx] \n"
87 "addu $t0, %[rtf], $t1 \n"
88 "lh $t6, 0($t2) \n" // t6 =
89 // filter_val[fx]
90 "lbu $t1, 0($t0) \n" // t1 = row[fx * 4 +
91 // 0]
92 "lbu $t2, 1($t0) \n" // t2 = row[fx * 4 +
93 // 1]
94 "lbu $t3, 2($t0) \n" // t3 = row[fx * 4 +
95 // 2]
96 "lbu $t4, 3($t0) \n" // t4 = row[fx * 4 +
97 // 2]
98 "maddu $ac3, $t6, $t1 \n"
99 "maddu $ac2, $t6, $t2 \n"
100 "maddu $ac1, $t6, $t3 \n"
101 "maddu $ac0, $t6, $t4 \n"
102 "addiu $t7, $t7, -1 \n"
103 "bgtz $t7, 21b \n"
104 " addiu %[fx], %[fx], 2 \n"
105
106 "3: \n"
107 "extrv.w $t0, $ac0, %[kShiftBits] \n" // a >> kShiftBits
108 "extrv.w $t1, $ac1, %[kShiftBits] \n" // b >> kShiftBits
109 "extrv.w $t2, $ac2, %[kShiftBits] \n" // g >> kShiftBits
110 "extrv.w $t3, $ac3, %[kShiftBits] \n" // r >> kShiftBits
111 "sll $t5, %[out_x], 2 \n"
112 "repl.ph $t6, 128 \n" // t6 = | 128 | 128 |
113 "addu $t5, %[out_row], $t5 \n"
114 "append $t2, $t3, 16 \n"
115 "append $t0, $t1, 16 \n"
116 "subu.ph $t1, $t0, $t6 \n"
117 "shll_s.ph $t1, $t1, 8 \n"
118 "shra.ph $t1, $t1, 8 \n"
119 "addu.ph $t1, $t1, $t6 \n"
120 "subu.ph $t3, $t2, $t6 \n"
121 "shll_s.ph $t3, $t3, 8 \n"
122 "shra.ph $t3, $t3, 8 \n"
123 "addu.ph $t3, $t3, $t6 \n"
124 "precr.qb.ph $t0, $t1, $t3 \n"
125 "usw $t0, 0($t5) \n"
126
127 ".set pop \n"
128 : [fx] "+r"(filter_x), [out_x] "+r"(out_x), [out_row] "+r"(out_row),
129 [rtf] "+r"(row_to_filter)
130 : [filter_val] "r"(filter_values), [filter_len] "r"(filter_length),
131 [kShiftBits] "r"(ConvolutionFilter1D::kShiftBits),
132 [filter_offset] "r"(filter_offset), [src_data] "r"(src_data)
133 : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
134 "$ac3hi", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
135 }
136 } else {
137 for (int out_x = 0; out_x < num_values; out_x++) {
138 // Get the filter that determines the current output pixel.
139 int filter_offset, filter_length;
140 const ConvolutionFilter1D::Fixed* filter_values =
141 filter.FilterForValue(out_x, &filter_offset, &filter_length);
142 int filter_x = 0;
143 __asm__ __volatile__(
144 ".set push \n"
145 ".set noreorder \n"
146
147 "beqz %[filter_len], 3f \n"
148 " sll $t0, %[filter_offset], 2 \n"
149 "addu %[rtf], %[src_data], $t0 \n"
150 "mtlo $0, $ac1 \n"
151 "mtlo $0, $ac2 \n"
152 "mtlo $0, $ac3 \n"
153 "srl $t7, %[filter_len], 2 \n"
154 "beqz $t7, 2f \n"
155 " li %[fx], 0 \n"
156
157 "11: \n"
158 "addu $t4, %[filter_val], %[fx] \n"
159 "sll $t5, %[fx], 1 \n"
160 "ulw $t6, 0($t4) \n" // t6 =
161 // |cur[1]|cur[0]|
162 "ulw $t8, 4($t4) \n" // t8 =
163 // |cur[3]|cur[2]|
164 "addu $t0, %[rtf], $t5 \n"
165 "lw $t1, 0($t0) \n" // t1 = |a0|b0|g0|r0|
166 "lw $t2, 4($t0) \n" // t2 = |a1|b1|g1|r1|
167 "lw $t3, 8($t0) \n" // t3 = |a2|b2|g2|r2|
168 "lw $t4, 12($t0) \n" // t4 = |a3|b3|g3|r3|
169 "precrq.qb.ph $t0, $t2, $t1 \n" // t0 = |a1|g1|a0|g0|
170 "precr.qb.ph $t5, $t2, $t1 \n" // t5 = |b1|r1|b0|r0|
171 "preceu.ph.qbra $t2, $t0 \n" // t2 = |0|g1|0|g0|
172 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|b1|0|b0|
173 "preceu.ph.qbra $t5, $t5 \n" // t5 = |0|r1|0|r0|
174 "dpa.w.ph $ac1, $t0, $t6 \n" // ac1+(cur*b1)+(cur* b0)
175 "dpa.w.ph $ac2, $t2, $t6 \n" // ac2+(cur*g1)+(cur* g0)
176 "dpa.w.ph $ac3, $t5, $t6 \n" // ac3+(cur*r1)+(cur* r0)
177 "precrq.qb.ph $t0, $t4, $t3 \n" // t0 = |a3|g3|a2|g2|
178 "precr.qb.ph $t5, $t4, $t3 \n" // t5 = |b3|r3|b2|r2|
179 "preceu.ph.qbra $t2, $t0 \n" // t2 = |0|g3|0|g2|
180 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|b3|0|b2|
181 "preceu.ph.qbra $t5, $t5 \n" // t5 = |0|r3|0|r2|
182 "dpa.w.ph $ac1, $t0, $t8 \n" // ac1+(cur*b3)+(cur* b2)
183 "dpa.w.ph $ac2, $t2, $t8 \n" // ac2+(cur*g3)+(cur* g2)
184 "dpa.w.ph $ac3, $t5, $t8 \n" // ac3+(cur*r3)+(cur* r2)
185 "addiu $t7, $t7, -1 \n"
186 "bgtz $t7, 11b \n"
187 " addiu %[fx], %[fx], 8 \n"
188
189 "2: \n"
190 "andi $t7, %[filter_len], 0x3 \n" // residual
191 "beqz $t7, 3f \n"
192 " nop \n"
193
194 "21: \n"
195 "sll $t1, %[fx], 1 \n"
196 "addu $t2, %[filter_val], %[fx] \n"
197 "addu $t0, %[rtf], $t1 \n"
198 "lh $t6, 0($t2) \n" // t6 =
199 // filter_val[fx]
200 "lbu $t1, 0($t0) \n" // t1 = row[fx * 4 +
201 // 0]
202 "lbu $t2, 1($t0) \n" // t2 = row[fx * 4 +
203 // 1]
204 "lbu $t3, 2($t0) \n" // t3 = row[fx * 4 +
205 // 2]
206 "maddu $ac3, $t6, $t1 \n"
207 "maddu $ac2, $t6, $t2 \n"
208 "maddu $ac1, $t6, $t3 \n"
209 "addiu $t7, $t7, -1 \n"
210 "bgtz $t7, 21b \n"
211 " addiu %[fx], %[fx], 2 \n"
212
213 "3: \n"
214 "extrv.w $t1, $ac1, %[kShiftBits] \n" // b >> kShiftBits
215 "extrv.w $t2, $ac2, %[kShiftBits] \n" // g >> kShiftBits
216 "extrv.w $t3, $ac3, %[kShiftBits] \n" // r >> kShiftBits
217 "repl.ph $t6, 128 \n" // t6 = | 128 | 128 |
218 "sll $t8, %[out_x], 2 \n"
219 "addu $t8, %[out_row], $t8 \n"
220 "append $t2, $t3, 16 \n"
221 "andi $t1, 0xFFFF \n"
222 "subu.ph $t5, $t1, $t6 \n"
223 "shll_s.ph $t5, $t5, 8 \n"
224 "shra.ph $t5, $t5, 8 \n"
225 "addu.ph $t5, $t5, $t6 \n"
226 "subu.ph $t4, $t2, $t6 \n"
227 "shll_s.ph $t4, $t4, 8 \n"
228 "shra.ph $t4, $t4, 8 \n"
229 "addu.ph $t4, $t4, $t6 \n"
230 "precr.qb.ph $t0, $t5, $t4 \n"
231 "usw $t0, 0($t8) \n"
232
233 ".set pop \n"
234 : [fx] "+r"(filter_x), [out_x] "+r"(out_x), [out_row] "+r"(out_row),
235 [rtf] "+r"(row_to_filter)
236 : [filter_val] "r"(filter_values), [filter_len] "r"(filter_length),
237 [kShiftBits] "r"(ConvolutionFilter1D::kShiftBits),
238 [filter_offset] "r"(filter_offset), [src_data] "r"(src_data)
239 : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
240 "$ac3hi", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
241 }
242 }
243 #endif
244 }
245 void ConvolveVertically_mips_dspr2(const ConvolutionFilter1D::Fixed* filter_val,
246 int filter_length,
247 unsigned char* const* source_data_rows,
248 int pixel_width,
249 unsigned char* out_row,
250 bool has_alpha) {
251 #if SIMD_MIPS_DSPR2
252 // We go through each column in the output and do a vertical convolution,
253 // generating one output pixel each time.
254 int byte_offset;
255 int cnt;
256 int filter_y;
257 if (has_alpha) {
258 for (int out_x = 0; out_x < pixel_width; out_x++) {
259 __asm__ __volatile__(
260 ".set push \n"
261 ".set noreorder \n"
262
263 "beqz %[filter_len], 3f \n"
264 " sll %[offset], %[out_x], 2 \n"
265 "mtlo $0, $ac0 \n"
266 "mtlo $0, $ac1 \n"
267 "mtlo $0, $ac2 \n"
268 "mtlo $0, $ac3 \n"
269 "srl %[cnt], %[filter_len], 2 \n"
270 "beqz %[cnt], 2f \n"
271 " li %[fy], 0 \n"
272
273 "11: \n"
274 "sll $t1, %[fy], 1 \n"
275 "addu $t0, %[src_data_rows], $t1 \n"
276 "lw $t1, 0($t0) \n"
277 "lw $t2, 4($t0) \n"
278 "lw $t3, 8($t0) \n"
279 "lw $t4, 12($t0) \n"
280 "addu $t1, $t1, %[offset] \n"
281 "addu $t2, $t2, %[offset] \n"
282 "addu $t3, $t3, %[offset] \n"
283 "addu $t4, $t4, %[offset] \n"
284 "lw $t1, 0($t1) \n" // t1 =
285 // |a0|b0|g0|r0|
286 "lw $t2, 0($t2) \n" // t2 =
287 // |a1|b1|g1|r1|
288 "lw $t3, 0($t3) \n" // t3 =
289 // |a0|b0|g0|r0|
290 "lw $t4, 0($t4) \n" // t4 =
291 // |a1|b1|g1|r1|
292 "precrq.qb.ph $t5, $t2, $t1 \n" // t5 =
293 // |a1|g1|a0|g0|
294 "precr.qb.ph $t6, $t2, $t1 \n" // t6 =
295 // |b1|r1|b0|r0|
296 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|a1|0|a0|
297 "preceu.ph.qbra $t1, $t5 \n" // t1 = |0|g1|0|g0|
298 "preceu.ph.qbla $t2, $t6 \n" // t2 = |0|b1|0|b0|
299 "preceu.ph.qbra $t5, $t6 \n" // t5 = |0|r1|0|r0|
300 "addu $t6, %[filter_val], %[fy] \n"
301 "ulw $t7, 0($t6) \n" // t7 =
302 // |cur_1|cur_0|
303 "ulw $t6, 4($t6) \n" // t6 =
304 // |cur_3|cur_2|
305 "dpa.w.ph $ac0, $t5, $t7 \n" // (cur*r1)+(cur*r0)
306 "dpa.w.ph $ac1, $t1, $t7 \n" // (cur*g1)+(cur*g0)
307 "dpa.w.ph $ac2, $t2, $t7 \n" // (cur*b1)+(cur*b0)
308 "dpa.w.ph $ac3, $t0, $t7 \n" // (cur*a1)+(cur*a0)
309 "precrq.qb.ph $t5, $t4, $t3 \n" // t5 =
310 // |a3|g3|a2|g2|
311 "precr.qb.ph $t7, $t4, $t3 \n" // t7 =
312 // |b3|r3|b2|r2|
313 "preceu.ph.qbla $t0, $t5 \n" // t0 = |0|a3|0|a2|
314 "preceu.ph.qbra $t1, $t5 \n" // t1 = |0|g3|0|g2|
315 "preceu.ph.qbla $t2, $t7 \n" // t2 = |0|b3|0|b2|
316 "preceu.ph.qbra $t5, $t7 \n" // t5 = |0|r3|0|r2|
317 "dpa.w.ph $ac0, $t5, $t6 \n" // (cur*r3)+(cur*r2)
318 "dpa.w.ph $ac1, $t1, $t6 \n" // (cur*g3)+(cur*g2)
319 "dpa.w.ph $ac2, $t2, $t6 \n" // (cur*b3)+(cur*b2)
320 "dpa.w.ph $ac3, $t0, $t6 \n" // (cur*a3)+(cur*a2)
321 "addiu %[cnt], %[cnt], -1 \n"
322 "bgtz %[cnt], 11b \n"
323 " addiu %[fy], %[fy], 8 \n"
324
325 "2: \n"
326 "andi %[cnt], %[filter_len], 0x3 \n" // residual
327 "beqz %[cnt], 3f \n"
328 " nop \n"
329
330 "21: \n"
331 "addu $t0, %[filter_val], %[fy] \n"
332 "lh $t4, 0($t0) \n" // t4=filter_val[fx]
333 "sll $t1, %[fy], 1 \n"
334 "addu $t0, %[src_data_rows], $t1 \n"
335 "lw $t1, 0($t0) \n"
336 "addu $t0, $t1, %[offset] \n"
337 "lbu $t1, 0($t0) \n" // t1 = row[fx*4 +
338 // 0]
339 "lbu $t2, 1($t0) \n" // t2 = row[fx*4 +
340 // 1]
341 "lbu $t3, 2($t0) \n" // t3 = row[fx*4 +
342 // 2]
343 "lbu $t0, 3($t0) \n" // t4 = row[fx*4 +
344 // 2]
345 "maddu $ac0, $t4, $t1 \n"
346 "maddu $ac1, $t4, $t2 \n"
347 "maddu $ac2, $t4, $t3 \n"
348 "maddu $ac3, $t4, $t0 \n"
349 "addiu %[cnt], %[cnt], -1 \n"
350 "bgtz %[cnt], 21b \n"
351 " addiu %[fy], %[fy], 2 \n"
352
353 "3: \n"
354 "extrv.w $t3, $ac0, %[kShiftBits] \n" // a >> kShiftBits
355 "extrv.w $t2, $ac1, %[kShiftBits] \n" // b >> kShiftBits
356 "extrv.w $t1, $ac2, %[kShiftBits] \n" // g >> kShiftBits
357 "extrv.w $t0, $ac3, %[kShiftBits] \n" // r >> kShiftBits
358 "repl.ph $t4, 128 \n" // t4 = | 128 | 128
359 // |
360 "addu $t5, %[out_row], %[offset] \n"
361 "append $t2, $t3, 16 \n" // t2 = |0|g|0|r|
362 "append $t0, $t1, 16 \n" // t0 = |0|a|0|b|
363 "subu.ph $t1, $t0, $t4 \n"
364 "shll_s.ph $t1, $t1, 8 \n"
365 "shra.ph $t1, $t1, 8 \n"
366 "addu.ph $t1, $t1, $t4 \n" // Clamp(a)|Clamp(b)
367 "subu.ph $t2, $t2, $t4 \n"
368 "shll_s.ph $t2, $t2, 8 \n"
369 "shra.ph $t2, $t2, 8 \n"
370 "addu.ph $t2, $t2, $t4 \n" // Clamp(g)|Clamp(r)
371 "andi $t3, $t1, 0xFF \n" // t3 = ClampTo8(b)
372 "cmp.lt.ph $t3, $t2 \n" // cmp b, g, r
373 "pick.ph $t0, $t2, $t3 \n"
374 "andi $t3, $t0, 0xFF \n"
375 "srl $t4, $t0, 16 \n"
376 "cmp.lt.ph $t3, $t4 \n"
377 "pick.ph $t0, $t4, $t3 \n" // t0 = max_color_ch
378 "srl $t3, $t1, 16 \n" // t1 = ClampTo8(a)
379 "cmp.lt.ph $t3, $t0 \n"
380 "pick.ph $t0, $t0, $t3 \n"
381 "ins $t1, $t0, 16, 8 \n"
382 "precr.qb.ph $t0, $t1, $t2 \n" // t0 = |a|b|g|r|
383 "usw $t0, 0($t5) \n"
384
385 ".set pop \n"
386 : [filter_val] "+r"(filter_val), [filter_len] "+r"(filter_length),
387 [offset] "+r"(byte_offset), [fy] "+r"(filter_y), [cnt] "+r"(cnt),
388 [out_x] "+r"(out_x), [pixel_width] "+r"(pixel_width)
389 : [src_data_rows] "r"(source_data_rows), [out_row] "r"(out_row),
390 [kShiftBits] "r"(ConvolutionFilter1D::kShiftBits)
391 : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
392 "$ac3hi", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "memory");
393 }
394 } else {
395 for (int out_x = 0; out_x < pixel_width; out_x++) {
396 __asm__ __volatile__(
397 ".set push \n"
398 ".set noreorder \n"
399
400 "beqz %[filter_len], 3f \n"
401 " sll %[offset], %[out_x], 2 \n"
402 "mtlo $0, $ac0 \n"
403 "mtlo $0, $ac1 \n"
404 "mtlo $0, $ac2 \n"
405 "srl %[cnt], %[filter_len], 2 \n"
406 "beqz %[cnt], 2f \n"
407 " li %[fy], 0 \n"
408
409 "11: \n"
410 "sll $t1, %[fy], 1 \n"
411 "addu $t0, %[src_data_rows], $t1 \n"
412 "lw $t1, 0($t0) \n"
413 "lw $t2, 4($t0) \n"
414 "lw $t3, 8($t0) \n"
415 "lw $t4, 12($t0) \n"
416 "addu $t1, $t1, %[offset] \n"
417 "addu $t2, $t2, %[offset] \n"
418 "addu $t3, $t3, %[offset] \n"
419 "addu $t4, $t4, %[offset] \n"
420 "lw $t1, 0($t1) \n" // t1 =
421 // |a0|b0|g0|r0|
422 "lw $t2, 0($t2) \n" // t2 =
423 // |a1|b1|g1|r1|
424 "lw $t3, 0($t3) \n" // t3 =
425 // |a0|b0|g0|r0|
426 "lw $t4, 0($t4) \n" // t4 =
427 // |a1|b1|g1|r1|
428 "precrq.qb.ph $t5, $t2, $t1 \n" // t5 =
429 // |a1|g1|a0|g0|
430 "precr.qb.ph $t6, $t2, $t1 \n" // t6 =
431 // |b1|r1|b0|r0|
432 "preceu.ph.qbra $t1, $t5 \n" // t1 = |0|g1|0|g0|
433 "preceu.ph.qbla $t2, $t6 \n" // t2 = |0|b1|0|b0|
434 "preceu.ph.qbra $t5, $t6 \n" // t5 = |0|r1|0|r0|
435 "addu $t6, %[filter_val], %[fy] \n"
436 "ulw $t0, 0($t6) \n" // t0 =
437 // |cur_1|cur_0|
438 "ulw $t6, 4($t6) \n" // t6 =
439 // |cur_1|cur_0|
440 "dpa.w.ph $ac0, $t5, $t0 \n" // (cur*r1)+(cur*r0)
441 "dpa.w.ph $ac1, $t1, $t0 \n" // (cur*g1)+(cur*g0)
442 "dpa.w.ph $ac2, $t2, $t0 \n" // (cur*b1)+(cur*b0)
443 "precrq.qb.ph $t5, $t4, $t3 \n" // t5 =
444 // |a3|g3|a2|g2|
445 "precr.qb.ph $t0, $t4, $t3 \n" // t0 =
446 // |b3|r3|b2|r2|
447 "preceu.ph.qbra $t1, $t5 \n" // t1 = |0|g3|0|g2|
448 "preceu.ph.qbla $t2, $t0 \n" // t2 = |0|b3|0|b2|
449 "preceu.ph.qbra $t5, $t0 \n" // t5 = |0|r3|0|r2|
450 "dpa.w.ph $ac0, $t5, $t6 \n" // (cur*r1)+(cur*r0)
451 "dpa.w.ph $ac1, $t1, $t6 \n" // (cur*g1)+(cur*g0)
452 "dpa.w.ph $ac2, $t2, $t6 \n" // (cur*b1)+(cur*b0)
453 "addiu %[cnt], %[cnt], -1 \n"
454 "bgtz %[cnt], 11b \n"
455 " addiu %[fy], %[fy], 8 \n"
456
457 "2: \n"
458 "andi %[cnt], %[filter_len], 0x3 \n" // residual
459 "beqz %[cnt], 3f \n"
460 " nop \n"
461
462 "21: \n"
463 "addu $t0, %[filter_val], %[fy] \n"
464 "lh $t4, 0($t0) \n" // filter_val[fx]
465 "sll $t1, %[fy], 1 \n"
466 "addu $t0, %[src_data_rows], $t1 \n"
467 "lw $t1, 0($t0) \n"
468 "addu $t0, $t1, %[offset] \n"
469 "lbu $t1, 0($t0) \n" // t1 = row[fx*4 +
470 // 0]
471 "lbu $t2, 1($t0) \n" // t2 = row[fx*4 +
472 // 1]
473 "lbu $t3, 2($t0) \n" // t3 = row[fx*4 +
474 // 2]
475 "maddu $ac0, $t4, $t1 \n"
476 "maddu $ac1, $t4, $t2 \n"
477 "maddu $ac2, $t4, $t3 \n"
478 "addiu %[cnt], %[cnt], -1 \n"
479 "bgtz %[cnt], 21b \n"
480 " addiu %[fy], %[fy], 2 \n"
481
482 "3: \n"
483 "extrv.w $t3, $ac0, %[kShiftBits] \n" // r >> kShiftBits
484 "extrv.w $t2, $ac1, %[kShiftBits] \n" // g >> kShiftBits
485 "extrv.w $t1, $ac2, %[kShiftBits] \n" // b >> kShiftBits
486 "repl.ph $t6, 128 \n" // t6 = | 128 | 128
487 // |
488 "addu $t5, %[out_row], %[offset] \n"
489 "append $t2, $t3, 16 \n" // t2 = |0|g|0|r|
490 "andi $t1, $t1, 0xFFFF \n"
491 "subu.ph $t1, $t1, $t6 \n"
492 "shll_s.ph $t1, $t1, 8 \n"
493 "shra.ph $t1, $t1, 8 \n"
494 "addu.ph $t1, $t1, $t6 \n" // Clamp(a)|Clamp(b)
495 "subu.ph $t2, $t2, $t6 \n"
496 "shll_s.ph $t2, $t2, 8 \n"
497 "shra.ph $t2, $t2, 8 \n"
498 "addu.ph $t2, $t2, $t6 \n" // Clamp(g)|Clamp(r)
499 "li $t0, 0xFF \n"
500 "ins $t1, $t0, 16, 8 \n"
501 "precr.qb.ph $t0, $t1, $t2 \n" // t0 = |a|b|g|r|
502 "usw $t0, 0($t5) \n"
503
504 ".set pop \n"
505 : [filter_val] "+r"(filter_val), [filter_len] "+r"(filter_length),
506 [offset] "+r"(byte_offset), [fy] "+r"(filter_y), [cnt] "+r"(cnt),
507 [out_x] "+r"(out_x), [pixel_width] "+r"(pixel_width)
508 : [src_data_rows] "r"(source_data_rows), [out_row] "r"(out_row),
509 [kShiftBits] "r"(ConvolutionFilter1D::kShiftBits)
510 : "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
511 "$ac3hi", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "memory");
512 }
513 }
514 #endif
515 }
516 } // namespace skia
OLDNEW
« no previous file with comments | « skia/ext/convolver_mips_dspr2.h ('k') | skia/ext/convolver_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698