Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(698)

Side by Side Diff: src/opts/SkBlitRow_opts_mips_dsp.cpp

Issue 326913004: MIPS: added optimization for functions from SkBlitRow. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « gyp/opts.gyp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2014 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkBlitRow.h"
9 #include "SkBlitMask.h"
10 #include "SkColorPriv.h"
11 #include "SkDither.h"
12 #include "SkMathPriv.h"
13
14 static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
15 const SkPMColor* SK_RESTRICT src, int count,
16 U8CPU alpha, int /*x*/, int /*y*/) {
17 register uint32_t t0, t1, t2, t3, t4, t5, t6;
18 register uint32_t s0, s1, s2, s4, s5, s6;
19
20 alpha += 1;
21 if (count >= 2) {
22 __asm__ volatile (
23 ".set push \n\t"
24 ".set noreorder \n\t"
25 "sll %[s4], %[alpha], 8 \n\t"
26 "or %[s4], %[s4], %[alpha] \n\t"
27 "repl.ph %[s5], 0x1f \n\t"
28 "repl.ph %[s6], 0x3f \n\t"
29 "1: \n\t"
30 "lw %[s2], 0(%[src]) \n\t"
31 "lw %[s1], 4(%[src]) \n\t"
32 "lwr %[s0], 0(%[dst]) \n\t"
33 "lwl %[s0], 3(%[dst]) \n\t"
34 "and %[t1], %[s0], %[s5] \n\t"
35 "shra.ph %[t0], %[s0], 5 \n\t"
36 "and %[t2], %[t0], %[s6] \n\t"
37 #ifdef __MIPS_HAVE_DSPR2
38 "shrl.ph %[t3], %[s0], 11 \n\t"
39 #else
40 "shra.ph %[t0], %[s0], 11 \n\t"
41 "and %[t3], %[t0], %[s5] \n\t"
42 #endif
43 "precrq.ph.w %[t0], %[s1], %[s2] \n\t"
44 "shrl.qb %[t5], %[t0], 3 \n\t"
45 "and %[t4], %[t5], %[s5] \n\t"
46 "ins %[s2], %[s1], 16, 16 \n\t"
47 "preceu.ph.qbra %[t0], %[s2] \n\t"
48 "shrl.qb %[t6], %[t0], 3 \n\t"
49 #ifdef __MIPS_HAVE_DSPR2
50 "shrl.ph %[t5], %[s2], 10 \n\t"
51 #else
52 "shra.ph %[t0], %[s2], 10 \n\t"
53 "and %[t5], %[t0], %[s6] \n\t"
54 #endif
55 "subu.qb %[t4], %[t4], %[t1] \n\t"
56 "subu.qb %[t5], %[t5], %[t2] \n\t"
57 "subu.qb %[t6], %[t6], %[t3] \n\t"
58 "muleu_s.ph.qbr %[t4], %[s4], %[t4] \n\t"
59 "muleu_s.ph.qbr %[t5], %[s4], %[t5] \n\t"
60 "muleu_s.ph.qbr %[t6], %[s4], %[t6] \n\t"
61 "addiu %[count], %[count], -2 \n\t"
62 "addiu %[src], %[src], 8 \n\t"
63 "shra.ph %[t4], %[t4], 8 \n\t"
64 "shra.ph %[t5], %[t5], 8 \n\t"
65 "shra.ph %[t6], %[t6], 8 \n\t"
66 "addu.qb %[t4], %[t4], %[t1] \n\t"
67 "addu.qb %[t5], %[t5], %[t2] \n\t"
68 "addu.qb %[t6], %[t6], %[t3] \n\t"
69 "andi %[s0], %[t4], 0xffff \n\t"
70 "andi %[t0], %[t5], 0xffff \n\t"
71 "sll %[t0], %[t0], 0x5 \n\t"
72 "or %[s0], %[s0], %[t0] \n\t"
73 "sll %[t0], %[t6], 0xb \n\t"
74 "or %[t0], %[t0], %[s0] \n\t"
75 "sh %[t0], 0(%[dst]) \n\t"
76 "srl %[s1], %[t4], 16 \n\t"
77 "srl %[t0], %[t5], 16 \n\t"
78 "sll %[t5], %[t0], 5 \n\t"
79 "or %[t0], %[t5], %[s1] \n\t"
80 "srl %[s0], %[t6], 16 \n\t"
81 "sll %[s2], %[s0], 0xb \n\t"
82 "or %[s1], %[s2], %[t0] \n\t"
83 "sh %[s1], 2(%[dst]) \n\t"
84 "bge %[count], 2, 1b \n\t"
85 " addiu %[dst], %[dst], 4 \n\t"
86 ".set pop \n\t"
87 : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
88 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
89 [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),
90 [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),
91 [src]"+r"(src)
92 : [alpha]"r"(alpha)
93 : "memory", "hi", "lo"
94 );
95 }
96
97 if (count == 1) {
98 SkPMColor c = *src++;
99 SkPMColorAssert(c);
100 SkASSERT(SkGetPackedA32(c) == 255);
101 uint16_t d = *dst;
102 *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),
103 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),
104 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));
105 }
106 }
107
108 static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
109 const SkPMColor* __restrict__ src,
110 int count, U8CPU alpha, int x, int y) {
111 __asm__ volatile (
112 "pref 0, 0(%[src]) \n\t"
113 "pref 1, 0(%[dst]) \n\t"
114 "pref 0, 32(%[src]) \n\t"
115 "pref 1, 32(%[dst]) \n\t"
116 :
117 : [src]"r"(src), [dst]"r"(dst)
118 : "memory"
119 );
120
121 register int32_t t0, t1, t2, t3, t4, t5, t6;
122 register int32_t t7, t8, t9, s0, s1, s2, s3;
123 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
124
125 if (count >= 2) {
126 __asm__ volatile (
127 ".set push \n\t"
128 ".set noreorder \n\t"
129 "li %[s1], 0x01010101 \n\t"
130 "li %[s2], -2017 \n\t"
131 "1: \n\t"
132 "bnez %[s3], 4f \n\t"
133 " li %[s3], 2 \n\t"
134 "pref 0, 64(%[src]) \n\t"
135 "pref 1, 64(%[dst]) \n\t"
136 "4: \n\t"
137 "addiu %[s3], %[s3], -1 \n\t"
138 "lw %[t1], 0(%[src]) \n\t"
139 "andi %[t3], %[x], 0x3 \n\t"
140 "addiu %[x], %[x], 1 \n\t"
141 "sll %[t4], %[t3], 2 \n\t"
142 "srav %[t5], %[dither_scan], %[t4] \n\t"
143 "andi %[t3], %[t5], 0xf \n\t"
144 "lw %[t2], 4(%[src]) \n\t"
145 "andi %[t4], %[x], 0x3 \n\t"
146 "sll %[t5], %[t4], 2 \n\t"
147 "srav %[t6], %[dither_scan], %[t5] \n\t"
148 "addiu %[x], %[x], 1 \n\t"
149 "ins %[t3], %[t6], 8, 4 \n\t"
150 "srl %[t4], %[t1], 24 \n\t"
151 "addiu %[t0], %[t4], 1 \n\t"
152 "srl %[t4], %[t2], 24 \n\t"
153 "addiu %[t5], %[t4], 1 \n\t"
154 "ins %[t0], %[t5], 16, 16 \n\t"
155 "muleu_s.ph.qbr %[t4], %[t3], %[t0] \n\t"
156 "preceu.ph.qbla %[t3], %[t4] \n\t"
157 "andi %[t4], %[t1], 0xff \n\t"
158 "ins %[t4], %[t2], 16, 8 \n\t"
159 "shrl.qb %[t5], %[t4], 5 \n\t"
160 "subu.qb %[t6], %[t3], %[t5] \n\t"
161 "addq.ph %[t5], %[t6], %[t4] \n\t"
162 "ext %[t4], %[t1], 8, 8 \n\t"
163 "srl %[t6], %[t2], 8 \n\t"
164 "ins %[t4], %[t6], 16, 8 \n\t"
165 "shrl.qb %[t6], %[t4], 6 \n\t"
166 "shrl.qb %[t7], %[t3], 1 \n\t"
167 "subu.qb %[t8], %[t7], %[t6] \n\t"
168 "addq.ph %[t6], %[t8], %[t4] \n\t"
169 "ext %[t4], %[t1], 16, 8 \n\t"
170 "srl %[t7], %[t2], 16 \n\t"
171 "ins %[t4], %[t7], 16, 8 \n\t"
172 "shrl.qb %[t7], %[t4], 5 \n\t"
173 "subu.qb %[t8], %[t3], %[t7] \n\t"
174 "addq.ph %[t7], %[t8], %[t4] \n\t"
175 "shll.ph %[t4], %[t7], 2 \n\t"
176 "andi %[t9], %[t4], 0xffff \n\t"
177 "srl %[s0], %[t4], 16 \n\t"
178 "andi %[t3], %[t6], 0xffff \n\t"
179 "srl %[t4], %[t6], 16 \n\t"
180 "andi %[t6], %[t5], 0xffff \n\t"
181 "srl %[t7], %[t5], 16 \n\t"
182 "subq.ph %[t5], %[s1], %[t0] \n\t"
183 "srl %[t0], %[t5], 3 \n\t"
184 "beqz %[t1], 3f \n\t"
185 " lhu %[t5], 0(%[dst]) \n\t"
186 "sll %[t1], %[t6], 13 \n\t"
187 "or %[t8], %[t9], %[t1] \n\t"
188 "sll %[t1], %[t3], 24 \n\t"
189 "or %[t9], %[t1], %[t8] \n\t"
190 "andi %[t3], %[t5], 0x7e0 \n\t"
191 "sll %[t6], %[t3], 0x10 \n\t"
192 "and %[t8], %[s2], %[t5] \n\t"
193 "or %[t5], %[t6], %[t8] \n\t"
194 "andi %[t6], %[t0], 0xff \n\t"
195 "mul %[t1], %[t6], %[t5] \n\t"
196 "addu %[t5], %[t1], %[t9] \n\t"
197 "srl %[t6], %[t5], 5 \n\t"
198 "and %[t5], %[s2], %[t6] \n\t"
199 "srl %[t8], %[t6], 16 \n\t"
200 "andi %[t6], %[t8], 0x7e0 \n\t"
201 "or %[t1], %[t5], %[t6] \n\t"
202 "sh %[t1], 0(%[dst]) \n\t"
203 "3: \n\t"
204 "beqz %[t2], 2f \n\t"
205 " lhu %[t5], 2(%[dst]) \n\t"
206 "sll %[t1], %[t7], 13 \n\t"
207 "or %[t8], %[s0], %[t1] \n\t"
208 "sll %[t1], %[t4], 24 \n\t"
209 "or %[t9], %[t1], %[t8] \n\t"
210 "andi %[t3], %[t5], 0x7e0 \n\t"
211 "sll %[t6], %[t3], 0x10 \n\t"
212 "and %[t8], %[s2], %[t5] \n\t"
213 "or %[t5], %[t6], %[t8] \n\t"
214 "srl %[t6], %[t0], 16 \n\t"
215 "mul %[t1], %[t6], %[t5] \n\t"
216 "addu %[t5], %[t1], %[t9] \n\t"
217 "srl %[t6], %[t5], 5 \n\t"
218 "and %[t5], %[s2], %[t6] \n\t"
219 "srl %[t8], %[t6], 16 \n\t"
220 "andi %[t6], %[t8], 0x7e0 \n\t"
221 "or %[t1], %[t5], %[t6] \n\t"
222 "sh %[t1], 2(%[dst]) \n\t"
223 "2: \n\t"
224 "addiu %[count], %[count], -2 \n\t"
225 "addiu %[src], %[src], 8 \n\t"
226 "addiu %[t1], %[count], -1 \n\t"
227 "bgtz %[t1], 1b \n\t"
228 " addiu %[dst], %[dst], 4 \n\t"
229 ".set pop \n\t"
230 : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),
231 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
232 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),
233 [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),
234 [s2]"=&r"(s2), [s3]"=&r"(s3)
235 : [dither_scan]"r"(dither_scan)
236 : "memory", "hi", "lo"
237 );
238 }
239
240 if (count == 1) {
241 SkPMColor c = *src++;
242 SkPMColorAssert(c);
243 if (c) {
244 unsigned a = SkGetPackedA32(c);
245 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
246
247 unsigned sr = SkGetPackedR32(c);
248 unsigned sg = SkGetPackedG32(c);
249 unsigned sb = SkGetPackedB32(c);
250 sr = SkDITHER_R32_FOR_565(sr, d);
251 sg = SkDITHER_G32_FOR_565(sg, d);
252 sb = SkDITHER_B32_FOR_565(sb, d);
253
254 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
255 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
256 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
257 // now src and dst expanded are in g:11 r:10 x:1 b:10
258 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
259 }
260 dst += 1;
261 DITHER_INC_X(x);
262 }
263 }
264
265 static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
266 const SkPMColor* __restrict__ src,
267 int count, U8CPU alpha, int x, int y ) {
268 uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
269 register uint32_t t0, t1, t2, t3, t4, t5;
270 register uint32_t t6, t7, t8, t9, s0;
271 int dither[4];
272 int i;
273
274 for (i = 0; i < 4; i++, x++) {
275 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
276 }
277
278 __asm__ volatile (
279 ".set push \n\t"
280 ".set noreorder \n\t"
281 "li %[s0], 1 \n\t"
282 "2: \n\t"
283 "beqz %[count], 1f \n\t"
284 " nop \n\t"
285 "addiu %[t0], %[count], -1 \n\t"
286 "beqz %[t0], 1f \n\t"
287 " nop \n\t"
288 "beqz %[s0], 3f \n\t"
289 " nop \n\t"
290 "lw %[t0], 0(%[dither]) \n\t"
291 "lw %[t1], 4(%[dither]) \n\t"
292 "li %[s0], 0 \n\t"
293 "b 4f \n\t"
294 " nop \n\t"
295 "3: \n\t"
296 "lw %[t0], 8(%[dither]) \n\t"
297 "lw %[t1], 12(%[dither]) \n\t"
298 "li %[s0], 1 \n\t"
299 "4: \n\t"
300 "sll %[t2], %[t0], 16 \n\t"
301 "or %[t1], %[t2], %[t1] \n\t"
302 "lw %[t0], 0(%[src]) \n\t"
303 "lw %[t2], 4(%[src]) \n\t"
304 "precrq.ph.w %[t3], %[t0], %[t2] \n\t"
305 "preceu.ph.qbra %[t9], %[t3] \n\t"
306 #ifdef __MIPS_HAVE_DSPR2
307 "append %[t0], %[t2], 16 \n\t"
308 "preceu.ph.qbra %[t4], %[t0] \n\t"
309 "preceu.ph.qbla %[t5], %[t0] \n\t"
310 #else
311 "sll %[t6], %[t0], 16 \n\t"
312 "sll %[t7], %[t2], 16 \n\t"
313 "precrq.ph.w %[t8], %[t6], %[t7] \n\t"
314 "preceu.ph.qbra %[t4], %[t8] \n\t"
315 "preceu.ph.qbla %[t5], %[t8] \n\t"
316 #endif
317 "addu.qb %[t0], %[t4], %[t1] \n\t"
318 "shra.ph %[t2], %[t4], 5 \n\t"
319 "subu.qb %[t3], %[t0], %[t2] \n\t"
320 "shra.ph %[t6], %[t3], 3 \n\t"
321 "addu.qb %[t0], %[t9], %[t1] \n\t"
322 "shra.ph %[t2], %[t9], 5 \n\t"
323 "subu.qb %[t3], %[t0], %[t2] \n\t"
324 "shra.ph %[t7], %[t3], 3 \n\t"
325 "shra.ph %[t0], %[t1], 1 \n\t"
326 "shra.ph %[t2], %[t5], 6 \n\t"
327 "addu.qb %[t3], %[t5], %[t0] \n\t"
328 "subu.qb %[t4], %[t3], %[t2] \n\t"
329 "shra.ph %[t8], %[t4], 2 \n\t"
330 "precrq.ph.w %[t0], %[t6], %[t7] \n\t"
331 #ifdef __MIPS_HAVE_DSPR2
332 "append %[t6], %[t7], 16 \n\t"
333 #else
334 "sll %[t6], %[t6], 16 \n\t"
335 "sll %[t2], %[t7], 16 \n\t"
336 "precrq.ph.w %[t6], %[t6], %[t2] \n\t"
337 #endif
338 "sra %[t4], %[t8], 16 \n\t"
339 "andi %[t5], %[t8], 0xFF \n\t"
340 "sll %[t7], %[t4], 5 \n\t"
341 "sra %[t8], %[t0], 5 \n\t"
342 "or %[t9], %[t7], %[t8] \n\t"
343 "or %[t3], %[t9], %[t0] \n\t"
344 "andi %[t4], %[t3], 0xFFFF \n\t"
345 "sll %[t7], %[t5], 5 \n\t"
346 "sra %[t8], %[t6], 5 \n\t"
347 "or %[t9], %[t7], %[t8] \n\t"
348 "or %[t3], %[t9], %[t6] \n\t"
349 "and %[t7], %[t3], 0xFFFF \n\t"
350 "sh %[t4], 0(%[dst]) \n\t"
351 "sh %[t7], 2(%[dst]) \n\t"
352 "addiu %[count], %[count], -2 \n\t"
353 "addiu %[src], %[src], 8 \n\t"
354 "b 2b \n\t"
355 " addiu %[dst], %[dst], 4 \n\t"
356 "1: \n\t"
357 ".set pop \n\t"
358 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
359 [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
360 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
361 [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)
362 : [dither] "r" (dither)
363 : "memory"
364 );
365
366 if (count == 1) {
367 SkPMColor c = *src++;
368 SkPMColorAssert(c); // only if DEBUG is turned on
369 SkASSERT(SkGetPackedA32(c) == 255);
370 unsigned dither = DITHER_VALUE(x);
371 *dst++ = SkDitherRGB32To565(c, dither);
372 }
373 }
374
375 static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,
376 const SkPMColor* src,
377 int count, U8CPU alpha, int x, int y) {
378 register int32_t t0, t1, t2, t3, t4, t5, t6;
379 register int32_t s0, s1, s2, s3;
380 register int x1 = 0;
381 register uint32_t sc_mul;
382 register uint32_t sc_add;
383 #ifdef ENABLE_DITHER_MATRIX_4X4
384 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
385 #else // ENABLE_DITHER_MATRIX_4X4
386 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
387 #endif // ENABLE_DITHER_MATRIX_4X4
388 int dither[4];
389
390 for (int i = 0; i < 4; i++) {
391 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
392 x += 1;
393 }
394 alpha += 1;
395 __asm__ volatile (
396 ".set push \n\t"
397 ".set noreorder \n\t"
398 "li %[t0], 0x100 \n\t"
399 "subu %[t0], %[t0], %[alpha] \n\t"
400 "replv.ph %[sc_mul], %[alpha] \n\t"
401 "beqz %[alpha], 1f \n\t"
402 " nop \n\t"
403 "replv.qb %[sc_add], %[t0] \n\t"
404 "b 2f \n\t"
405 " nop \n\t"
406 "1: \n\t"
407 "replv.qb %[sc_add], %[alpha] \n\t"
408 "2: \n\t"
409 "addiu %[t2], %[count], -1 \n\t"
410 "blez %[t2], 3f \n\t"
411 " nop \n\t"
412 "lw %[s0], 0(%[src]) \n\t"
413 "lw %[s1], 4(%[src]) \n\t"
414 "bnez %[x1], 4f \n\t"
415 " nop \n\t"
416 "lw %[t0], 0(%[dither]) \n\t"
417 "lw %[t1], 4(%[dither]) \n\t"
418 "li %[x1], 1 \n\t"
419 "b 5f \n\t"
420 " nop \n\t"
421 "4: \n\t"
422 "lw %[t0], 8(%[dither]) \n\t"
423 "lw %[t1], 12(%[dither]) \n\t"
424 "li %[x1], 0 \n\t"
425 "5: \n\t"
426 "sll %[t3], %[t0], 7 \n\t"
427 "sll %[t4], %[t1], 7 \n\t"
428 #ifdef __MIPS_HAVE_DSPR2
429 "append %[t0], %[t1], 16 \n\t"
430 #else
431 "sll %[t0], %[t0], 8 \n\t"
432 "sll %[t2], %[t1], 8 \n\t"
433 "precrq.qb.ph %[t0], %[t0], %[t2] \n\t"
434 #endif
435 "precrq.qb.ph %[t1], %[t3], %[t4] \n\t"
436 "sll %[t5], %[s0], 8 \n\t"
437 "sll %[t6], %[s1], 8 \n\t"
438 "precrq.qb.ph %[t4], %[t5], %[t6] \n\t"
439 "precrq.qb.ph %[t6], %[s0], %[s1] \n\t"
440 "preceu.ph.qbla %[t5], %[t4] \n\t"
441 "preceu.ph.qbra %[t4], %[t4] \n\t"
442 "preceu.ph.qbra %[t6], %[t6] \n\t"
443 "lh %[t2], 0(%[dst]) \n\t"
444 "lh %[s1], 2(%[dst]) \n\t"
445 #ifdef __MIPS_HAVE_DSPR2
446 "append %[t2], %[s1], 16 \n\t"
447 #else
448 "sll %[s1], %[s1], 16 \n\t"
449 "packrl.ph %[t2], %[t2], %[s1] \n\t"
450 #endif
451 "shra.ph %[s1], %[t2], 11 \n\t"
452 "and %[s1], %[s1], 0x1F001F \n\t"
453 "shra.ph %[s2], %[t2], 5 \n\t"
454 "and %[s2], %[s2], 0x3F003F \n\t"
455 "and %[s3], %[t2], 0x1F001F \n\t"
456 "shrl.qb %[t3], %[t4], 5 \n\t"
457 "addu.qb %[t4], %[t4], %[t0] \n\t"
458 "subu.qb %[t4], %[t4], %[t3] \n\t"
459 "shrl.qb %[t4], %[t4], 3 \n\t"
460 "shrl.qb %[t3], %[t5], 5 \n\t"
461 "addu.qb %[t5], %[t5], %[t0] \n\t"
462 "subu.qb %[t5], %[t5], %[t3] \n\t"
463 "shrl.qb %[t5], %[t5], 3 \n\t"
464 "shrl.qb %[t3], %[t6], 6 \n\t"
465 "addu.qb %[t6], %[t6], %[t1] \n\t"
466 "subu.qb %[t6], %[t6], %[t3] \n\t"
467 "shrl.qb %[t6], %[t6], 2 \n\t"
468 "cmpu.lt.qb %[t4], %[s1] \n\t"
469 "pick.qb %[s0], %[sc_add], $0 \n\t"
470 "addu.qb %[s0], %[s0], %[s1] \n\t"
471 "subu.qb %[t4], %[t4], %[s1] \n\t"
472 "muleu_s.ph.qbl %[t0], %[t4], %[sc_mul] \n\t"
473 "muleu_s.ph.qbr %[t1], %[t4], %[sc_mul] \n\t"
474 "precrq.qb.ph %[t4], %[t0], %[t1] \n\t"
475 "addu.qb %[t4], %[t4], %[s0] \n\t"
476 "cmpu.lt.qb %[t5], %[s3] \n\t"
477 "pick.qb %[s0], %[sc_add], $0 \n\t"
478 "addu.qb %[s0], %[s0], %[s3] \n\t"
479 "subu.qb %[t5], %[t5], %[s3] \n\t"
480 "muleu_s.ph.qbl %[t0], %[t5], %[sc_mul] \n\t"
481 "muleu_s.ph.qbr %[t1], %[t5], %[sc_mul] \n\t"
482 "precrq.qb.ph %[t5], %[t0], %[t1] \n\t"
483 "addu.qb %[t5], %[t5], %[s0] \n\t"
484 "cmpu.lt.qb %[t6], %[s2] \n\t"
485 "pick.qb %[s0], %[sc_add], $0 \n\t"
486 "addu.qb %[s0], %[s0], %[s2] \n\t"
487 "subu.qb %[t6], %[t6], %[s2] \n\t"
488 "muleu_s.ph.qbl %[t0], %[t6], %[sc_mul] \n\t"
489 "muleu_s.ph.qbr %[t1], %[t6], %[sc_mul] \n\t"
490 "precrq.qb.ph %[t6], %[t0], %[t1] \n\t"
491 "addu.qb %[t6], %[t6], %[s0] \n\t"
492 "shll.ph %[s1], %[t4], 11 \n\t"
493 "shll.ph %[t0], %[t6], 5 \n\t"
494 "or %[s0], %[s1], %[t0] \n\t"
495 "or %[s1], %[s0], %[t5] \n\t"
496 "srl %[t2], %[s1], 16 \n\t"
497 "and %[t3], %[s1], 0xFFFF \n\t"
498 "sh %[t2], 0(%[dst]) \n\t"
499 "sh %[t3], 2(%[dst]) \n\t"
500 "addiu %[src], %[src], 8 \n\t"
501 "addi %[count], %[count], -2 \n\t"
502 "b 2b \n\t"
503 " addu %[dst], %[dst], 4 \n\t"
504 "3: \n\t"
505 ".set pop \n\t"
506 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
507 [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),
508 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
509 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
510 [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
511 : [dither]"r"(dither), [alpha]"r"(alpha)
512 : "memory", "hi", "lo"
513 );
514
515 if(count == 1) {
516 SkPMColor c = *src++;
517 SkPMColorAssert(c);
518 SkASSERT(SkGetPackedA32(c) == 255);
519 DITHER_565_SCAN(y);
520 int dither = DITHER_VALUE(x);
521 int sr = SkGetPackedR32(c);
522 int sg = SkGetPackedG32(c);
523 int sb = SkGetPackedB32(c);
524 sr = SkDITHER_R32To565(sr, dither);
525 sg = SkDITHER_G32To565(sg, dither);
526 sb = SkDITHER_B32To565(sb, dither);
527
528 uint16_t d = *dst;
529 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),
530 SkAlphaBlend(sg, SkGetPackedG16(d), alpha),
531 SkAlphaBlend(sb, SkGetPackedB16(d), alpha));
532 DITHER_INC_X(x);
533 }
534 }
535
536 static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,
537 const SkPMColor* __restrict__ src,
538 int count, U8CPU alpha, int x, int y) {
539
540 __asm__ volatile (
541 "pref 0, 0(%[src]) \n\t"
542 "pref 1, 0(%[dst]) \n\t"
543 "pref 0, 32(%[src]) \n\t"
544 "pref 1, 32(%[dst]) \n\t"
545 :
546 : [src]"r"(src), [dst]"r"(dst)
547 : "memory"
548 );
549
550 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
551 register uint32_t t16;
552 register uint32_t add_x10 = 0x100010;
553 register uint32_t add_x20 = 0x200020;
554 register uint32_t sa = 0xff00ff;
555
556 __asm__ volatile (
557 ".set push \n\t"
558 ".set noreorder \n\t"
559 "blez %[count], 1f \n\t"
560 " nop \n\t"
561 "2: \n\t"
562 "beqz %[count], 1f \n\t"
563 " nop \n\t"
564 "addiu %[t0], %[count], -1 \n\t"
565 "beqz %[t0], 1f \n\t"
566 " nop \n\t"
567 "bnez %[t16], 3f \n\t"
568 " nop \n\t"
569 "li %[t16], 2 \n\t"
570 "pref 0, 64(%[src]) \n\t"
571 "pref 1, 64(%[dst]) \n\t"
572 "3: \n\t"
573 "addiu %[t16], %[t16], -1 \n\t"
574 "lw %[t0], 0(%[src]) \n\t"
575 "lw %[t1], 4(%[src]) \n\t"
576 "precrq.ph.w %[t2], %[t0], %[t1] \n\t"
577 "preceu.ph.qbra %[t8], %[t2] \n\t"
578 #ifdef __MIPS_HAVE_DSPR2
579 "append %[t0], %[t1], 16 \n\t"
580 #else
581 "sll %[t0], %[t0], 16 \n\t"
582 "sll %[t6], %[t1], 16 \n\t"
583 "precrq.ph.w %[t0], %[t0], %[t6] \n\t"
584 #endif
585 "preceu.ph.qbra %[t3], %[t0] \n\t"
586 "preceu.ph.qbla %[t4], %[t0] \n\t"
587 "preceu.ph.qbla %[t0], %[t2] \n\t"
588 "subq.ph %[t1], %[sa], %[t0] \n\t"
589 "sra %[t2], %[t1], 8 \n\t"
590 "or %[t5], %[t2], %[t1] \n\t"
591 "replv.ph %[t2], %[t5] \n\t"
592 "lh %[t0], 0(%[dst]) \n\t"
593 "lh %[t1], 2(%[dst]) \n\t"
594 "and %[t1], %[t1], 0xffff \n\t"
595 #ifdef __MIPS_HAVE_DSPR2
596 "append %[t0], %[t1], 16 \n\t"
597 #else
598 "sll %[t5], %[t0], 16 \n\t"
599 "or %[t0], %[t5], %[t1] \n\t"
600 #endif
601 "and %[t1], %[t0], 0x1f001f \n\t"
602 "shra.ph %[t6], %[t0], 11 \n\t"
603 "and %[t6], %[t6], 0x1f001f \n\t"
604 "and %[t7], %[t0], 0x7e007e0 \n\t"
605 "shra.ph %[t5], %[t7], 5 \n\t"
606 "muleu_s.ph.qbl %[t0], %[t2], %[t6] \n\t"
607 "addq.ph %[t7], %[t0], %[add_x10] \n\t"
608 "shra.ph %[t6], %[t7], 5 \n\t"
609 "addq.ph %[t6], %[t7], %[t6] \n\t"
610 "shra.ph %[t0], %[t6], 5 \n\t"
611 "addq.ph %[t7], %[t0], %[t3] \n\t"
612 "shra.ph %[t6], %[t7], 3 \n\t"
613 "muleu_s.ph.qbl %[t0], %[t2], %[t1] \n\t"
614 "addq.ph %[t7], %[t0], %[add_x10] \n\t"
615 "shra.ph %[t0], %[t7], 5 \n\t"
616 "addq.ph %[t7], %[t7], %[t0] \n\t"
617 "shra.ph %[t0], %[t7], 5 \n\t"
618 "addq.ph %[t7], %[t0], %[t8] \n\t"
619 "shra.ph %[t3], %[t7], 3 \n\t"
620 "muleu_s.ph.qbl %[t0], %[t2], %[t5] \n\t"
621 "addq.ph %[t7], %[t0], %[add_x20] \n\t"
622 "shra.ph %[t0], %[t7], 6 \n\t"
623 "addq.ph %[t8], %[t7], %[t0] \n\t"
624 "shra.ph %[t0], %[t8], 6 \n\t"
625 "addq.ph %[t7], %[t0], %[t4] \n\t"
626 "shra.ph %[t8], %[t7], 2 \n\t"
627 "shll.ph %[t0], %[t8], 5 \n\t"
628 "shll.ph %[t1], %[t6], 11 \n\t"
629 "or %[t2], %[t0], %[t1] \n\t"
630 "or %[t3], %[t2], %[t3] \n\t"
631 "sra %[t4], %[t3], 16 \n\t"
632 "sh %[t4], 0(%[dst]) \n\t"
633 "sh %[t3], 2(%[dst]) \n\t"
634 "addiu %[count], %[count], -2 \n\t"
635 "addiu %[src], %[src], 8 \n\t"
636 "b 2b \n\t"
637 " addiu %[dst], %[dst], 4 \n\t"
638 "1: \n\t"
639 ".set pop \n\t"
640 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
641 [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
642 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
643 [t7]"=&r"(t7), [t8]"=&r"(t8)
644 : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)
645 : "memory", "hi", "lo"
646 );
647
648 if (count == 1) {
649 SkPMColor c = *src++;
650 SkPMColorAssert(c);
651 if (c) {
652 *dst = SkSrcOver32To16(c, *dst);
653 }
654 dst += 1;
655 }
656 }
657
658 static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
659 const SkPMColor* SK_RESTRICT src, int count ,
660 U8CPU alpha, int /*x*/, int /*y*/) {
661 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
662 register uint32_t s0, s1, s2, s3;
663 register unsigned dst_scale = 0;
664
665 __asm__ volatile (
666 ".set push \n\t"
667 ".set noreorder \n\t"
668 "replv.qb %[t0], %[alpha] \n\t"
669 "repl.ph %[t6], 0x80 \n\t"
670 "repl.ph %[t7], 0xFF \n\t"
671 "1: \n\t"
672 "addiu %[t8], %[count], -1 \n\t"
673 "blez %[t8], 2f \n\t"
674 " nop \n\t"
675 "lw %[t8], 0(%[src]) \n\t"
676 "lw %[t9], 4(%[src]) \n\t"
677 "lh %[t4], 0(%[dst]) \n\t"
678 "lh %[t5], 2(%[dst]) \n\t"
679 "sll %[t5], %[t5], 16 \n\t"
680 "sll %[t2], %[t8], 8 \n\t"
681 "sll %[t3], %[t9], 8 \n\t"
682 "precrq.qb.ph %[t1], %[t2], %[t3] \n\t"
683 "precrq.qb.ph %[t3], %[t8], %[t9] \n\t"
684 "preceu.ph.qbla %[t8], %[t3] \n\t"
685 "muleu_s.ph.qbr %[s3], %[t0], %[t8] \n\t"
686 "preceu.ph.qbla %[t2], %[t1] \n\t"
687 "preceu.ph.qbra %[t1], %[t1] \n\t"
688 "preceu.ph.qbra %[t3], %[t3] \n\t"
689 "packrl.ph %[t9], %[t4], %[t5] \n\t"
690 "shra.ph %[s0], %[t9], 11 \n\t"
691 "and %[s0], %[s0], 0x1F001F \n\t"
692 "shra.ph %[s1], %[t9], 5 \n\t"
693 "and %[s1], %[s1], 0x3F003F \n\t"
694 "and %[s2], %[t9], 0x1F001F \n\t"
695 "addq.ph %[s3], %[s3], %[t6] \n\t"
696 "shra.ph %[t5], %[s3], 8 \n\t"
697 "and %[t5], %[t5], 0xFF00FF \n\t"
698 "addq.ph %[dst_scale], %[s3], %[t5] \n\t"
699 "shra.ph %[dst_scale], %[dst_scale], 8 \n\t"
700 "subq_s.ph %[dst_scale], %[t7], %[dst_scale] \n\t"
701 "sll %[dst_scale], %[dst_scale], 8 \n\t"
702 "precrq.qb.ph %[dst_scale], %[dst_scale], %[dst_scale] \n\t"
703 "shrl.qb %[t1], %[t1], 3 \n\t"
704 "shrl.qb %[t2], %[t2], 3 \n\t"
705 "shrl.qb %[t3], %[t3], 2 \n\t"
706 "muleu_s.ph.qbl %[t1], %[t0], %[t1] \n\t"
707 "muleu_s.ph.qbl %[t2], %[t0], %[t2] \n\t"
708 "muleu_s.ph.qbl %[t3], %[t0], %[t3] \n\t"
709 "muleu_s.ph.qbl %[t8], %[dst_scale], %[s0] \n\t"
710 "muleu_s.ph.qbl %[t9], %[dst_scale], %[s2] \n\t"
711 "muleu_s.ph.qbl %[t4], %[dst_scale], %[s1] \n\t"
712 "addq.ph %[t1], %[t1], %[t8] \n\t"
713 "addq.ph %[t2], %[t2], %[t9] \n\t"
714 "addq.ph %[t3], %[t3], %[t4] \n\t"
715 "addq.ph %[t8], %[t1], %[t6] \n\t"
716 "addq.ph %[t9], %[t2], %[t6] \n\t"
717 "addq.ph %[t4], %[t3], %[t6] \n\t"
718 "shra.ph %[t1], %[t8], 8 \n\t"
719 "addq.ph %[t1], %[t1], %[t8] \n\t"
720 "preceu.ph.qbla %[t1], %[t1] \n\t"
721 "shra.ph %[t2], %[t9], 8 \n\t"
722 "addq.ph %[t2], %[t2], %[t9] \n\t"
723 "preceu.ph.qbla %[t2], %[t2] \n\t"
724 "shra.ph %[t3], %[t4], 8 \n\t"
725 "addq.ph %[t3], %[t3], %[t4] \n\t"
726 "preceu.ph.qbla %[t3], %[t3] \n\t"
727 "shll.ph %[t8], %[t1], 11 \n\t"
728 "shll.ph %[t9], %[t3], 5 \n\t"
729 "or %[t8], %[t8], %[t9] \n\t"
730 "or %[s0], %[t8], %[t2] \n\t"
731 "srl %[t8], %[s0], 16 \n\t"
732 "and %[t9], %[s0], 0xFFFF \n\t"
733 "sh %[t8], 0(%[dst]) \n\t"
734 "sh %[t9], 2(%[dst]) \n\t"
735 "addiu %[src], %[src], 8 \n\t"
736 "addiu %[count], %[count], -2 \n\t"
737 "b 1b \n\t"
738 " addiu %[dst], %[dst], 4 \n\t"
739 "2: \n\t"
740 ".set pop \n\t"
741 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
742 [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),
743 [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),
744 [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),
745 [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)
746 : [alpha]"r"(alpha)
747 : "memory", "hi", "lo"
748 );
749
750 if (count == 1) {
751 SkPMColor sc = *src++;
752 SkPMColorAssert(sc);
753 if (sc) {
754 uint16_t dc = *dst;
755 unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alph a);
756 unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) +
757 SkMulS16(SkGetPackedR16(dc), dst_scale);
758 unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) +
759 SkMulS16(SkGetPackedG16(dc), dst_scale);
760 unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) +
761 SkMulS16(SkGetPackedB16(dc), dst_scale);
762 *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Rou nd(db));
763 }
764 dst += 1;
765 }
766 }
767
768 static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,
769 const SkPMColor* SK_RESTRICT src,
770 int count, U8CPU alpha) {
771 register int32_t t0, t1, t2, t3, t4, t5, t6, t7;
772
773 __asm__ volatile (
774 ".set push \n\t"
775 ".set noreorder \n\t"
776 "li %[t2], 0x100 \n\t"
777 "addiu %[t0], %[alpha], 1 \n\t"
778 "subu %[t1], %[t2], %[t0] \n\t"
779 "replv.qb %[t7], %[t0] \n\t"
780 "replv.qb %[t6], %[t1] \n\t"
781 "1: \n\t"
782 "blez %[count], 2f \n\t"
783 "lw %[t0], 0(%[src]) \n\t"
784 "lw %[t1], 0(%[dst]) \n\t"
785 "preceu.ph.qbr %[t2], %[t0] \n\t"
786 "preceu.ph.qbl %[t3], %[t0] \n\t"
787 "preceu.ph.qbr %[t4], %[t1] \n\t"
788 "preceu.ph.qbl %[t5], %[t1] \n\t"
789 "muleu_s.ph.qbr %[t2], %[t7], %[t2] \n\t"
790 "muleu_s.ph.qbr %[t3], %[t7], %[t3] \n\t"
791 "muleu_s.ph.qbr %[t4], %[t6], %[t4] \n\t"
792 "muleu_s.ph.qbr %[t5], %[t6], %[t5] \n\t"
793 "addiu %[src], %[src], 4 \n\t"
794 "addiu %[count], %[count], -1 \n\t"
795 "precrq.qb.ph %[t0], %[t3], %[t2] \n\t"
796 "precrq.qb.ph %[t2], %[t5], %[t4] \n\t"
797 "addu %[t1], %[t0], %[t2] \n\t"
798 "sw %[t1], 0(%[dst]) \n\t"
799 "b 1b \n\t"
800 " addi %[dst], %[dst], 4 \n\t"
801 "2: \n\t"
802 ".set pop \n\t"
803 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
804 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
805 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
806 : [alpha]"r"(alpha)
807 : "memory", "hi", "lo"
808 );
809 }
810
811 //////////////////////////////////////////////////////////////////////////////// ///////////////////
812
813 const SkBlitRow::Proc platform_565_procs_mips_dsp[] = {
814 // no dither
815 NULL,
816 S32_D565_Blend_mips_dsp,
817 S32A_D565_Opaque_mips_dsp,
818 S32A_D565_Blend_mips_dsp,
819
820 // dither
821 S32_D565_Opaque_Dither_mips_dsp,
822 S32_D565_Blend_Dither_mips_dsp,
823 S32A_D565_Opaque_Dither_mips_dsp,
824 NULL,
825 };
826
827 static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {
828 NULL, // S32_Opaque,
829 S32_Blend_BlitRow32_mips_dsp, // S32_Blend,
830 NULL, // S32A_Opaque,
831 NULL, // S32A_Blend,
832 };
833
834 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
835 return platform_565_procs_mips_dsp[flags];
836 }
837
838 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
839 return platform_32_procs_mips_dsp[flags];
840 }
841
842 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
843 return NULL;
844 }
845
846 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
847 return NULL;
848 }
OLDNEW
« no previous file with comments | « gyp/opts.gyp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698