Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(795)

Side by Side Diff: src/opts/SkBlitRow_opts_arm.cpp

Issue 1277953002: Purge non-NEON ARM code. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBitmapProcState_opts_arm.cpp ('k') | src/opts/SkBlitRow_opts_arm_neon.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow.h" 8 #include "SkBlitRow.h"
9 #include "SkColorPriv.h"
10 #include "SkDither.h"
11 #include "SkMathPriv.h"
12 #include "SkUtils.h"
13 #include "SkUtilsArm.h" 9 #include "SkUtilsArm.h"
14 10
15 // Define USE_NEON_CODE to indicate that we need to build NEON routines 11 #include "SkBlitRow_opts_arm_neon.h"
16 #define USE_NEON_CODE (!SK_ARM_NEON_IS_NONE)
17
18 // Define USE_ARM_CODE to indicate that we need to build ARM routines
19 #define USE_ARM_CODE (!SK_ARM_NEON_IS_ALWAYS)
20
21 #if USE_NEON_CODE
22 #include "SkBlitRow_opts_arm_neon.h"
23 #endif
24
25 #if USE_ARM_CODE
26
27 static void S32A_D565_Opaque(uint16_t* SK_RESTRICT dst,
28 const SkPMColor* SK_RESTRICT src, int count,
29 U8CPU alpha, int /*x*/, int /*y*/) {
30 SkASSERT(255 == alpha);
31
32 asm volatile (
33 "1: \n\t"
34 "ldr r3, [%[src]], #4 \n\t"
35 "cmp r3, #0xff000000 \n\t"
36 "blo 2f \n\t"
37 "and r4, r3, #0x0000f8 \n\t"
38 "and r5, r3, #0x00fc00 \n\t"
39 "and r6, r3, #0xf80000 \n\t"
40 #ifdef SK_ARM_HAS_EDSP
41 "pld [r1, #32] \n\t"
42 #endif
43 "lsl r3, r4, #8 \n\t"
44 "orr r3, r3, r5, lsr #5 \n\t"
45 "orr r3, r3, r6, lsr #19 \n\t"
46 "subs %[count], %[count], #1 \n\t"
47 "strh r3, [%[dst]], #2 \n\t"
48 "bne 1b \n\t"
49 "b 4f \n\t"
50 "2: \n\t"
51 "lsrs r7, r3, #24 \n\t"
52 "beq 3f \n\t"
53 "ldrh r4, [%[dst]] \n\t"
54 "rsb r7, r7, #255 \n\t"
55 "and r6, r4, #0x001f \n\t"
56 #if SK_ARM_ARCH <= 6
57 "lsl r5, r4, #21 \n\t"
58 "lsr r5, r5, #26 \n\t"
59 #else
60 "ubfx r5, r4, #5, #6 \n\t"
61 #endif
62 #ifdef SK_ARM_HAS_EDSP
63 "pld [r0, #16] \n\t"
64 #endif
65 "lsr r4, r4, #11 \n\t"
66 #ifdef SK_ARM_HAS_EDSP
67 "smulbb r6, r6, r7 \n\t"
68 "smulbb r5, r5, r7 \n\t"
69 "smulbb r4, r4, r7 \n\t"
70 #else
71 "mul r6, r6, r7 \n\t"
72 "mul r5, r5, r7 \n\t"
73 "mul r4, r4, r7 \n\t"
74 #endif
75 #if SK_ARM_ARCH >= 6
76 "uxtb r7, r3, ROR #16 \n\t"
77 "uxtb ip, r3, ROR #8 \n\t"
78 #else
79 "mov ip, #0xff \n\t"
80 "and r7, ip, r3, ROR #16 \n\t"
81 "and ip, ip, r3, ROR #8 \n\t"
82 #endif
83 "and r3, r3, #0xff \n\t"
84 "add r6, r6, #16 \n\t"
85 "add r5, r5, #32 \n\t"
86 "add r4, r4, #16 \n\t"
87 "add r6, r6, r6, lsr #5 \n\t"
88 "add r5, r5, r5, lsr #6 \n\t"
89 "add r4, r4, r4, lsr #5 \n\t"
90 "add r6, r7, r6, lsr #5 \n\t"
91 "add r5, ip, r5, lsr #6 \n\t"
92 "add r4, r3, r4, lsr #5 \n\t"
93 "lsr r6, r6, #3 \n\t"
94 "and r5, r5, #0xfc \n\t"
95 "and r4, r4, #0xf8 \n\t"
96 "orr r6, r6, r5, lsl #3 \n\t"
97 "orr r4, r6, r4, lsl #8 \n\t"
98 "strh r4, [%[dst]], #2 \n\t"
99 #ifdef SK_ARM_HAS_EDSP
100 "pld [r1, #32] \n\t"
101 #endif
102 "subs %[count], %[count], #1 \n\t"
103 "bne 1b \n\t"
104 "b 4f \n\t"
105 "3: \n\t"
106 "subs %[count], %[count], #1 \n\t"
107 "add %[dst], %[dst], #2 \n\t"
108 "bne 1b \n\t"
109 "4: \n\t"
110 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count)
111 :
112 : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip"
113 );
114 }
115
116 static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
117 const SkPMColor* SK_RESTRICT src,
118 int count, U8CPU alpha) {
119
120 SkASSERT(255 == alpha);
121
122 asm volatile (
123 "cmp %[count], #0 \n\t" /* comparing count wi th 0 */
124 "beq 3f \n\t" /* if zero exit */
125
126 "mov ip, #0xff \n\t" /* load the 0xff mask in ip */
127 "orr ip, ip, ip, lsl #16 \n\t" /* convert it to 0xff 00ff in ip */
128
129 "cmp %[count], #2 \n\t" /* compare count with 2 */
130 "blt 2f \n\t" /* if less than 2 -> single loop */
131
132 /* Double Loop */
133 "1: \n\t" /* <double loop> */
134 "ldm %[src]!, {r5,r6} \n\t" /* load the src(s) at r5-r6 */
135 "ldm %[dst], {r7,r8} \n\t" /* loading dst(s) int o r7-r8 */
136 "lsr r4, r5, #24 \n\t" /* extracting the alp ha from source and storing it to r4 */
137
138 /* ----------- */
139 "and r9, ip, r7 \n\t" /* r9 = br masked by ip */
140 "rsb r4, r4, #256 \n\t" /* subtracting the al pha from 256 -> r4=scale */
141 "and r10, ip, r7, lsr #8 \n\t" /* r10 = ag masked by ip */
142
143 "mul r9, r9, r4 \n\t" /* br = br * scale */
144 "mul r10, r10, r4 \n\t" /* ag = ag * scale */
145 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and ma sk it */
146
147 "and r10, r10, ip, lsl #8 \n\t" /* mask ag with rever se mask */
148 "lsr r4, r6, #24 \n\t" /* extracting the alp ha from source and storing it to r4 */
149 "orr r7, r9, r10 \n\t" /* br | ag*/
150
151 "add r7, r5, r7 \n\t" /* dst = src + calc d est(r7) */
152 "rsb r4, r4, #256 \n\t" /* subtracting the al pha from 255 -> r4=scale */
153
154 /* ----------- */
155 "and r9, ip, r8 \n\t" /* r9 = br masked by ip */
156
157 "and r10, ip, r8, lsr #8 \n\t" /* r10 = ag masked by ip */
158 "mul r9, r9, r4 \n\t" /* br = br * scale */
159 "sub %[count], %[count], #2 \n\t"
160 "mul r10, r10, r4 \n\t" /* ag = ag * scale */
161
162 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and ma sk it */
163 "and r10, r10, ip, lsl #8 \n\t" /* mask ag with rever se mask */
164 "cmp %[count], #1 \n\t" /* comparing count wi th 1 */
165 "orr r8, r9, r10 \n\t" /* br | ag */
166
167 "add r8, r6, r8 \n\t" /* dst = src + calc d est(r8) */
168
169 /* ----------------- */
170 "stm %[dst]!, {r7,r8} \n\t" /* *dst = r7, increme nt dst by two (each times 4) */
171 /* ----------------- */
172
173 "bgt 1b \n\t" /* if greater than 1 -> reloop */
174 "blt 3f \n\t" /* if less than 1 -> exit */
175
176 /* Single Loop */
177 "2: \n\t" /* <single loop> */
178 "ldr r5, [%[src]], #4 \n\t" /* load the src point er into r5 r5=src */
179 "ldr r7, [%[dst]] \n\t" /* loading dst into r 7 */
180 "lsr r4, r5, #24 \n\t" /* extracting the alp ha from source and storing it to r4 */
181
182 /* ----------- */
183 "and r9, ip, r7 \n\t" /* r9 = br masked by ip */
184 "rsb r4, r4, #256 \n\t" /* subtracting the al pha from 256 -> r4=scale */
185
186 "and r10, ip, r7, lsr #8 \n\t" /* r10 = ag masked by ip */
187 "mul r9, r9, r4 \n\t" /* br = br * scale */
188 "mul r10, r10, r4 \n\t" /* ag = ag * scale */
189 "and r9, ip, r9, lsr #8 \n\t" /* lsr br by 8 and ma sk it */
190
191 "and r10, r10, ip, lsl #8 \n\t" /* mask ag */
192 "orr r7, r9, r10 \n\t" /* br | ag */
193
194 "add r7, r5, r7 \n\t" /* *dst = src + calc dest(r7) */
195
196 /* ----------------- */
197 "str r7, [%[dst]], #4 \n\t" /* *dst = r7, increme nt dst by one (times 4) */
198 /* ----------------- */
199
200 "3: \n\t" /* <exit> */
201 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count)
202 :
203 : "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "ip", "memo ry"
204 );
205 }
206
207 /*
208 * ARM asm version of S32A_Blend_BlitRow32
209 */
210 void S32A_Blend_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
211 const SkPMColor* SK_RESTRICT src,
212 int count, U8CPU alpha) {
213 asm volatile (
214 "cmp %[count], #0 \n\t" /* comparing count wi th 0 */
215 "beq 3f \n\t" /* if zero exit */
216
217 "mov r12, #0xff \n\t" /* load the 0xff mask in r12 */
218 "orr r12, r12, r12, lsl #16 \n\t" /* convert it to 0xff 00ff in r12 */
219
220 /* src1,2_scale */
221 "add %[alpha], %[alpha], #1 \n\t" /* loading %[alpha]=s rc_scale=alpha+1 */
222
223 "cmp %[count], #2 \n\t" /* comparing count wi th 2 */
224 "blt 2f \n\t" /* if less than 2 -> single loop */
225
226 /* Double Loop */
227 "1: \n\t" /* <double loop> */
228 "ldm %[src]!, {r5, r6} \n\t" /* loading src pointe rs into r5 and r6 */
229 "ldm %[dst], {r7, r8} \n\t" /* loading dst pointe rs into r7 and r8 */
230
231 /* dst1_scale and dst2_scale*/
232 "lsr r9, r5, #24 \n\t" /* src >> 24 */
233 "lsr r10, r6, #24 \n\t" /* src >> 24 */
234 #ifdef SK_ARM_HAS_EDSP
235 "smulbb r9, r9, %[alpha] \n\t" /* r9 = SkMulS16 r9 w ith src_scale */
236 "smulbb r10, r10, %[alpha] \n\t" /* r10 = SkMulS16 r10 with src_scale */
237 #else
238 "mul r9, r9, %[alpha] \n\t" /* r9 = SkMulS16 r9 w ith src_scale */
239 "mul r10, r10, %[alpha] \n\t" /* r10 = SkMulS16 r10 with src_scale */
240 #endif
241 "lsr r9, r9, #8 \n\t" /* r9 >> 8 */
242 "lsr r10, r10, #8 \n\t" /* r10 >> 8 */
243 "rsb r9, r9, #256 \n\t" /* dst1_scale = r9 = 255 - r9 + 1 */
244 "rsb r10, r10, #256 \n\t" /* dst2_scale = r10 = 255 - r10 + 1 */
245
246 /* ---------------------- */
247
248 /* src1, src1_scale */
249 "and r11, r12, r5, lsr #8 \n\t" /* ag = r11 = r5 mask ed by r12 lsr by #8 */
250 "and r4, r12, r5 \n\t" /* rb = r4 = r5 maske d by r12 */
251 "mul r11, r11, %[alpha] \n\t" /* ag = r11 times src _scale */
252 "mul r4, r4, %[alpha] \n\t" /* rb = r4 times src_ scale */
253 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
254 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */
255 "orr r5, r11, r4 \n\t" /* r5 = (src1, src_sc ale) */
256
257 /* dst1, dst1_scale */
258 "and r11, r12, r7, lsr #8 \n\t" /* ag = r11 = r7 mask ed by r12 lsr by #8 */
259 "and r4, r12, r7 \n\t" /* rb = r4 = r7 maske d by r12 */
260 "mul r11, r11, r9 \n\t" /* ag = r11 times dst _scale (r9) */
261 "mul r4, r4, r9 \n\t" /* rb = r4 times dst_ scale (r9) */
262 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
263 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */
264 "orr r9, r11, r4 \n\t" /* r9 = (dst1, dst_sc ale) */
265
266 /* ---------------------- */
267 "add r9, r5, r9 \n\t" /* *dst = src plus ds t both scaled */
268 /* ---------------------- */
269
270 /* ====================== */
271
272 /* src2, src2_scale */
273 "and r11, r12, r6, lsr #8 \n\t" /* ag = r11 = r6 mask ed by r12 lsr by #8 */
274 "and r4, r12, r6 \n\t" /* rb = r4 = r6 maske d by r12 */
275 "mul r11, r11, %[alpha] \n\t" /* ag = r11 times src _scale */
276 "mul r4, r4, %[alpha] \n\t" /* rb = r4 times src_ scale */
277 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
278 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */
279 "orr r6, r11, r4 \n\t" /* r6 = (src2, src_sc ale) */
280
281 /* dst2, dst2_scale */
282 "and r11, r12, r8, lsr #8 \n\t" /* ag = r11 = r8 mask ed by r12 lsr by #8 */
283 "and r4, r12, r8 \n\t" /* rb = r4 = r8 maske d by r12 */
284 "mul r11, r11, r10 \n\t" /* ag = r11 times dst _scale (r10) */
285 "mul r4, r4, r10 \n\t" /* rb = r4 times dst_ scale (r6) */
286 "and r11, r11, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
287 "and r4, r12, r4, lsr #8 \n\t" /* rb masked by mask (r12) */
288 "orr r10, r11, r4 \n\t" /* r10 = (dst2, dst_s cale) */
289
290 "sub %[count], %[count], #2 \n\t" /* decrease count by 2 */
291 /* ---------------------- */
292 "add r10, r6, r10 \n\t" /* *dst = src plus ds t both scaled */
293 /* ---------------------- */
294 "cmp %[count], #1 \n\t" /* compare count with 1 */
295 /* ----------------- */
296 "stm %[dst]!, {r9, r10} \n\t" /* copy r9 and r10 to r7 and r8 respectively */
297 /* ----------------- */
298
299 "bgt 1b \n\t" /* if %[count] greate r than 1 reloop */
300 "blt 3f \n\t" /* if %[count] less t han 1 exit */
301 /* else get into the single loop */
302 /* Single Loop */
303 "2: \n\t" /* <single loop> */
304 "ldr r5, [%[src]], #4 \n\t" /* loading src pointe r into r5: r5=src */
305 "ldr r7, [%[dst]] \n\t" /* loading dst pointe r into r7: r7=dst */
306
307 "lsr r6, r5, #24 \n\t" /* src >> 24 */
308 "and r8, r12, r5, lsr #8 \n\t" /* ag = r8 = r5 maske d by r12 lsr by #8 */
309 #ifdef SK_ARM_HAS_EDSP
310 "smulbb r6, r6, %[alpha] \n\t" /* r6 = SkMulS16 with src_scale */
311 #else
312 "mul r6, r6, %[alpha] \n\t" /* r6 = SkMulS16 with src_scale */
313 #endif
314 "and r9, r12, r5 \n\t" /* rb = r9 = r5 maske d by r12 */
315 "lsr r6, r6, #8 \n\t" /* r6 >> 8 */
316 "mul r8, r8, %[alpha] \n\t" /* ag = r8 times scal e */
317 "rsb r6, r6, #256 \n\t" /* r6 = 255 - r6 + 1 */
318
319 /* src, src_scale */
320 "mul r9, r9, %[alpha] \n\t" /* rb = r9 times scal e */
321 "and r8, r8, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
322 "and r9, r12, r9, lsr #8 \n\t" /* rb masked by mask (r12) */
323 "orr r10, r8, r9 \n\t" /* r10 = (scr, src_sc ale) */
324
325 /* dst, dst_scale */
326 "and r8, r12, r7, lsr #8 \n\t" /* ag = r8 = r7 maske d by r12 lsr by #8 */
327 "and r9, r12, r7 \n\t" /* rb = r9 = r7 maske d by r12 */
328 "mul r8, r8, r6 \n\t" /* ag = r8 times scal e (r6) */
329 "mul r9, r9, r6 \n\t" /* rb = r9 times scal e (r6) */
330 "and r8, r8, r12, lsl #8 \n\t" /* ag masked by rever se mask (r12) */
331 "and r9, r12, r9, lsr #8 \n\t" /* rb masked by mask (r12) */
332 "orr r7, r8, r9 \n\t" /* r7 = (dst, dst_sca le) */
333
334 "add r10, r7, r10 \n\t" /* *dst = src plus ds t both scaled */
335
336 /* ----------------- */
337 "str r10, [%[dst]], #4 \n\t" /* *dst = r10, postin crement dst by one (times 4) */
338 /* ----------------- */
339
340 "3: \n\t" /* <exit> */
341 : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count), [a lpha] "+r" (alpha)
342 :
343 : "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12 ", "memory"
344 );
345
346 }
347
348 ///////////////////////////////////////////////////////////////////////////////
349 12
350 static const SkBlitRow::Proc16 sk_blitrow_platform_565_procs_arm[] = { 13 static const SkBlitRow::Proc16 sk_blitrow_platform_565_procs_arm[] = {
351 // no dither 14 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
352 // NOTE: For the functions below, we don't have a special version
353 // that assumes that each source pixel is opaque. But our S32A is
354 // still faster than the default, so use it.
355 S32A_D565_Opaque, // S32_D565_Opaque
356 NULL, // S32_D565_Blend
357 S32A_D565_Opaque, // S32A_D565_Opaque
358 NULL, // S32A_D565_Blend
359
360 // dither
361 NULL, // S32_D565_Opaque_Dither
362 NULL, // S32_D565_Blend_Dither
363 NULL, // S32A_D565_Opaque_Dither
364 NULL, // S32A_D565_Blend_Dither
365 }; 15 };
366 16
367 static const SkBlitRow::ColorProc16 sk_blitrow_platform_565_colorprocs_arm[] = { 17 static const SkBlitRow::ColorProc16 sk_blitrow_platform_565_colorprocs_arm[] = {
368 NULL, // Color32A_D565, 18 NULL, NULL,
369 NULL, // Color32A_D565_Dither
370 }; 19 };
371 20
372 static const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm[] = { 21 static const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm[] = {
373 NULL, // S32_Opaque, 22 NULL, NULL, NULL, NULL,
374 NULL, // S32_Blend,
375 S32A_Opaque_BlitRow32_arm, // S32A_Opaque,
376 S32A_Blend_BlitRow32_arm // S32A_Blend
377 }; 23 };
378 24
379 #endif // USE_ARM_CODE
380
381 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { 25 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
382 return SK_ARM_NEON_WRAP(sk_blitrow_platform_565_procs_arm)[flags]; 26 return SK_ARM_NEON_WRAP(sk_blitrow_platform_565_procs_arm)[flags];
383 } 27 }
384 28
385 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { 29 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
386 return SK_ARM_NEON_WRAP(sk_blitrow_platform_565_colorprocs_arm)[flags]; 30 return SK_ARM_NEON_WRAP(sk_blitrow_platform_565_colorprocs_arm)[flags];
387 } 31 }
388 32
389 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 33 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
390 return SK_ARM_NEON_WRAP(sk_blitrow_platform_32_procs_arm)[flags]; 34 return SK_ARM_NEON_WRAP(sk_blitrow_platform_32_procs_arm)[flags];
391 } 35 }
392 36
393 SkBlitRow::Color32Proc SkBlitRow::PlatformColor32Proc() { 37 SkBlitRow::Color32Proc SkBlitRow::PlatformColor32Proc() {
394 #define sk_blitrow_color32_arm NULL 38 #define sk_blitrow_color32_arm NULL
395 return SK_ARM_NEON_WRAP(sk_blitrow_color32_arm); 39 return SK_ARM_NEON_WRAP(sk_blitrow_color32_arm);
396 } 40 }
OLDNEW
« no previous file with comments | « src/opts/SkBitmapProcState_opts_arm.cpp ('k') | src/opts/SkBlitRow_opts_arm_neon.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698