OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 | 8 |
9 #include "SkBitmapProcState.h" | 9 #include "SkBitmapProcState.h" |
10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
43 if (1 == s.fBitmap->width()) { | 43 if (1 == s.fBitmap->width()) { |
44 src = srcAddr[0]; | 44 src = srcAddr[0]; |
45 uint16_t dstValue = table[src]; | 45 uint16_t dstValue = table[src]; |
46 sk_memset16(colors, dstValue, count); | 46 sk_memset16(colors, dstValue, count); |
47 } else { | 47 } else { |
48 int i; | 48 int i; |
49 int count8 = count >> 3; | 49 int count8 = count >> 3; |
50 const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); | 50 const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); |
51 | 51 |
52 asm volatile ( | 52 asm volatile ( |
53 "cmp %[count8], #0 \n\t" // com
pare loop counter with 0 | 53 "cmp %[count8], #0 \n\t" // compare loop co
unter with 0 |
54 "beq 2f \n\t" // if
loop counter == 0, exit | 54 "beq 2f \n\t" // if loop counter
== 0, exit |
55 "1: \n\t" | 55 "1: \n\t" |
56 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // loa
d ptrs to pixels 0-7 | 56 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // load ptrs to pi
xels 0-7 |
57 "subs %[count8], %[count8], #1 \n\t" // dec
rement loop counter | 57 "subs %[count8], %[count8], #1 \n\t" // decrement loop
counter |
58 "uxth r4, r5 \n\t" // ext
ract ptr 0 | 58 "uxth r4, r5 \n\t" // extract ptr 0 |
59 "mov r5, r5, lsr #16 \n\t" // ext
ract ptr 1 | 59 "mov r5, r5, lsr #16 \n\t" // extract ptr 1 |
60 "uxth r6, r7 \n\t" // ext
ract ptr 2 | 60 "uxth r6, r7 \n\t" // extract ptr 2 |
61 "mov r7, r7, lsr #16 \n\t" // ext
ract ptr 3 | 61 "mov r7, r7, lsr #16 \n\t" // extract ptr 3 |
62 "ldrb r4, [%[srcAddr], r4] \n\t" // loa
d pixel 0 from image | 62 "ldrb r4, [%[srcAddr], r4] \n\t" // load pixel 0 fr
om image |
63 "uxth r8, r9 \n\t" // ext
ract ptr 4 | 63 "uxth r8, r9 \n\t" // extract ptr 4 |
64 "ldrb r5, [%[srcAddr], r5] \n\t" // loa
d pixel 1 from image | 64 "ldrb r5, [%[srcAddr], r5] \n\t" // load pixel 1 fr
om image |
65 "mov r9, r9, lsr #16 \n\t" // ext
ract ptr 5 | 65 "mov r9, r9, lsr #16 \n\t" // extract ptr 5 |
66 "ldrb r6, [%[srcAddr], r6] \n\t" // loa
d pixel 2 from image | 66 "ldrb r6, [%[srcAddr], r6] \n\t" // load pixel 2 fr
om image |
67 "uxth r10, r11 \n\t" // ext
ract ptr 6 | 67 "uxth r10, r11 \n\t" // extract ptr 6 |
68 "ldrb r7, [%[srcAddr], r7] \n\t" // loa
d pixel 3 from image | 68 "ldrb r7, [%[srcAddr], r7] \n\t" // load pixel 3 fr
om image |
69 "mov r11, r11, lsr #16 \n\t" // ext
ract ptr 7 | 69 "mov r11, r11, lsr #16 \n\t" // extract ptr 7 |
70 "ldrb r8, [%[srcAddr], r8] \n\t" // loa
d pixel 4 from image | 70 "ldrb r8, [%[srcAddr], r8] \n\t" // load pixel 4 fr
om image |
71 "add r4, r4, r4 \n\t" // dou
ble pixel 0 for RGB565 lookup | 71 "add r4, r4, r4 \n\t" // double pixel 0
for RGB565 lookup |
72 "ldrb r9, [%[srcAddr], r9] \n\t" // loa
d pixel 5 from image | 72 "ldrb r9, [%[srcAddr], r9] \n\t" // load pixel 5 fr
om image |
73 "add r5, r5, r5 \n\t" // dou
ble pixel 1 for RGB565 lookup | 73 "add r5, r5, r5 \n\t" // double pixel 1
for RGB565 lookup |
74 "ldrb r10, [%[srcAddr], r10] \n\t" // loa
d pixel 6 from image | 74 "ldrb r10, [%[srcAddr], r10] \n\t" // load pixel 6 fr
om image |
75 "add r6, r6, r6 \n\t" // dou
ble pixel 2 for RGB565 lookup | 75 "add r6, r6, r6 \n\t" // double pixel 2
for RGB565 lookup |
76 "ldrb r11, [%[srcAddr], r11] \n\t" // loa
d pixel 7 from image | 76 "ldrb r11, [%[srcAddr], r11] \n\t" // load pixel 7 fr
om image |
77 "add r7, r7, r7 \n\t" // dou
ble pixel 3 for RGB565 lookup | 77 "add r7, r7, r7 \n\t" // double pixel 3
for RGB565 lookup |
78 "ldrh r4, [%[table], r4] \n\t" // loa
d pixel 0 RGB565 from colmap | 78 "ldrh r4, [%[table], r4] \n\t" // load pixel 0 RG
B565 from colmap |
79 "add r8, r8, r8 \n\t" // dou
ble pixel 4 for RGB565 lookup | 79 "add r8, r8, r8 \n\t" // double pixel 4
for RGB565 lookup |
80 "ldrh r5, [%[table], r5] \n\t" // loa
d pixel 1 RGB565 from colmap | 80 "ldrh r5, [%[table], r5] \n\t" // load pixel 1 RG
B565 from colmap |
81 "add r9, r9, r9 \n\t" // dou
ble pixel 5 for RGB565 lookup | 81 "add r9, r9, r9 \n\t" // double pixel 5
for RGB565 lookup |
82 "ldrh r6, [%[table], r6] \n\t" // loa
d pixel 2 RGB565 from colmap | 82 "ldrh r6, [%[table], r6] \n\t" // load pixel 2 RG
B565 from colmap |
83 "add r10, r10, r10 \n\t" // dou
ble pixel 6 for RGB565 lookup | 83 "add r10, r10, r10 \n\t" // double pixel 6
for RGB565 lookup |
84 "ldrh r7, [%[table], r7] \n\t" // loa
d pixel 3 RGB565 from colmap | 84 "ldrh r7, [%[table], r7] \n\t" // load pixel 3 RG
B565 from colmap |
85 "add r11, r11, r11 \n\t" // dou
ble pixel 7 for RGB565 lookup | 85 "add r11, r11, r11 \n\t" // double pixel 7
for RGB565 lookup |
86 "ldrh r8, [%[table], r8] \n\t" // loa
d pixel 4 RGB565 from colmap | 86 "ldrh r8, [%[table], r8] \n\t" // load pixel 4 RG
B565 from colmap |
87 "ldrh r9, [%[table], r9] \n\t" // loa
d pixel 5 RGB565 from colmap | 87 "ldrh r9, [%[table], r9] \n\t" // load pixel 5 RG
B565 from colmap |
88 "ldrh r10, [%[table], r10] \n\t" // loa
d pixel 6 RGB565 from colmap | 88 "ldrh r10, [%[table], r10] \n\t" // load pixel 6 RG
B565 from colmap |
89 "ldrh r11, [%[table], r11] \n\t" // loa
d pixel 7 RGB565 from colmap | 89 "ldrh r11, [%[table], r11] \n\t" // load pixel 7 RG
B565 from colmap |
90 "pkhbt r5, r4, r5, lsl #16 \n\t" // pac
k pixels 0 and 1 | 90 "pkhbt r5, r4, r5, lsl #16 \n\t" // pack pixels 0 a
nd 1 |
91 "pkhbt r6, r6, r7, lsl #16 \n\t" // pac
k pixels 2 and 3 | 91 "pkhbt r6, r6, r7, lsl #16 \n\t" // pack pixels 2 a
nd 3 |
92 "pkhbt r8, r8, r9, lsl #16 \n\t" // pac
k pixels 4 and 5 | 92 "pkhbt r8, r8, r9, lsl #16 \n\t" // pack pixels 4 a
nd 5 |
93 "pkhbt r10, r10, r11, lsl #16 \n\t" // pac
k pixels 6 and 7 | 93 "pkhbt r10, r10, r11, lsl #16 \n\t" // pack pixels 6 a
nd 7 |
94 "stmia %[colors]!, {r5, r6, r8, r10} \n\t" // sto
re last 8 pixels | 94 "stmia %[colors]!, {r5, r6, r8, r10} \n\t" // store last 8 pi
xels |
95 "bgt 1b \n\t" // loo
p if counter > 0 | 95 "bgt 1b \n\t" // loop if counter
> 0 |
96 "2: \n\t" | 96 "2: \n\t" |
97 : [xx] "+r" (xx), [count8] "+r" (count8), [colors] "+r" (c
olors) | 97 : [xx] "+r" (xx), [count8] "+r" (count8), [colors] "+r" (colors) |
98 : [table] "r" (table), [srcAddr] "r" (srcAddr) | 98 : [table] "r" (table), [srcAddr] "r" (srcAddr) |
99 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10
", "r11" | 99 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" |
100 ); | 100 ); |
101 | 101 |
102 for (i = (count & 7); i > 0; --i) { | 102 for (i = (count & 7); i > 0; --i) { |
103 src = srcAddr[*xx++]; *colors++ = table[src]; | 103 src = srcAddr[*xx++]; *colors++ = table[src]; |
104 } | 104 } |
105 } | 105 } |
106 | 106 |
107 s.fBitmap->getColorTable()->unlock16BitCache(); | 107 s.fBitmap->getColorTable()->unlock16BitCache(); |
108 } | 108 } |
109 | 109 |
110 void SI8_opaque_D32_nofilter_DX_arm( | 110 void SI8_opaque_D32_nofilter_DX_arm( |
(...skipping 18 matching lines...) Expand all Loading... |
129 srcAddr = (const uint8_t*)((const char*)srcAddr + xy[0] * s.fBitmap->rowByte
s()); | 129 srcAddr = (const uint8_t*)((const char*)srcAddr + xy[0] * s.fBitmap->rowByte
s()); |
130 | 130 |
131 if (1 == s.fBitmap->width()) { | 131 if (1 == s.fBitmap->width()) { |
132 uint8_t src = srcAddr[0]; | 132 uint8_t src = srcAddr[0]; |
133 SkPMColor dstValue = table[src]; | 133 SkPMColor dstValue = table[src]; |
134 sk_memset32(colors, dstValue, count); | 134 sk_memset32(colors, dstValue, count); |
135 } else { | 135 } else { |
136 const uint16_t* xx = (const uint16_t*)(xy + 1); | 136 const uint16_t* xx = (const uint16_t*)(xy + 1); |
137 | 137 |
138 asm volatile ( | 138 asm volatile ( |
139 "subs %[count], %[count], #8 \n\t" // dec
rement count by 8, set flags | 139 "subs %[count], %[count], #8 \n\t" // decrement count
by 8, set flags |
140 "blt 2f \n\t" // if
count < 0, branch to singles | 140 "blt 2f \n\t" // if count < 0, b
ranch to singles |
141 "1: \n\t" //
eights loop | 141 "1: \n\t" // eights loop |
142 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // loa
d ptrs to pixels 0-7 | 142 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // load ptrs to pi
xels 0-7 |
143 "uxth r4, r5 \n\t" // ext
ract ptr 0 | 143 "uxth r4, r5 \n\t" // extract ptr 0 |
144 "mov r5, r5, lsr #16 \n\t" // ext
ract ptr 1 | 144 "mov r5, r5, lsr #16 \n\t" // extract ptr 1 |
145 "uxth r6, r7 \n\t" // ext
ract ptr 2 | 145 "uxth r6, r7 \n\t" // extract ptr 2 |
146 "mov r7, r7, lsr #16 \n\t" // ext
ract ptr 3 | 146 "mov r7, r7, lsr #16 \n\t" // extract ptr 3 |
147 "ldrb r4, [%[srcAddr], r4] \n\t" // loa
d pixel 0 from image | 147 "ldrb r4, [%[srcAddr], r4] \n\t" // load pixel 0 fr
om image |
148 "uxth r8, r9 \n\t" // ext
ract ptr 4 | 148 "uxth r8, r9 \n\t" // extract ptr 4 |
149 "ldrb r5, [%[srcAddr], r5] \n\t" // loa
d pixel 1 from image | 149 "ldrb r5, [%[srcAddr], r5] \n\t" // load pixel 1 fr
om image |
150 "mov r9, r9, lsr #16 \n\t" // ext
ract ptr 5 | 150 "mov r9, r9, lsr #16 \n\t" // extract ptr 5 |
151 "ldrb r6, [%[srcAddr], r6] \n\t" // loa
d pixel 2 from image | 151 "ldrb r6, [%[srcAddr], r6] \n\t" // load pixel 2 fr
om image |
152 "uxth r10, r11 \n\t" // ext
ract ptr 6 | 152 "uxth r10, r11 \n\t" // extract ptr 6 |
153 "ldrb r7, [%[srcAddr], r7] \n\t" // loa
d pixel 3 from image | 153 "ldrb r7, [%[srcAddr], r7] \n\t" // load pixel 3 fr
om image |
154 "mov r11, r11, lsr #16 \n\t" // ext
ract ptr 7 | 154 "mov r11, r11, lsr #16 \n\t" // extract ptr 7 |
155 "ldrb r8, [%[srcAddr], r8] \n\t" // loa
d pixel 4 from image | 155 "ldrb r8, [%[srcAddr], r8] \n\t" // load pixel 4 fr
om image |
156 "ldrb r9, [%[srcAddr], r9] \n\t" // loa
d pixel 5 from image | 156 "ldrb r9, [%[srcAddr], r9] \n\t" // load pixel 5 fr
om image |
157 "ldrb r10, [%[srcAddr], r10] \n\t" // loa
d pixel 6 from image | 157 "ldrb r10, [%[srcAddr], r10] \n\t" // load pixel 6 fr
om image |
158 "ldrb r11, [%[srcAddr], r11] \n\t" // loa
d pixel 7 from image | 158 "ldrb r11, [%[srcAddr], r11] \n\t" // load pixel 7 fr
om image |
159 "ldr r4, [%[table], r4, lsl #2] \n\t" // loa
d pixel 0 SkPMColor from colmap | 159 "ldr r4, [%[table], r4, lsl #2] \n\t" // load pixel 0 Sk
PMColor from colmap |
160 "ldr r5, [%[table], r5, lsl #2] \n\t" // loa
d pixel 1 SkPMColor from colmap | 160 "ldr r5, [%[table], r5, lsl #2] \n\t" // load pixel 1 Sk
PMColor from colmap |
161 "ldr r6, [%[table], r6, lsl #2] \n\t" // loa
d pixel 2 SkPMColor from colmap | 161 "ldr r6, [%[table], r6, lsl #2] \n\t" // load pixel 2 Sk
PMColor from colmap |
162 "ldr r7, [%[table], r7, lsl #2] \n\t" // loa
d pixel 3 SkPMColor from colmap | 162 "ldr r7, [%[table], r7, lsl #2] \n\t" // load pixel 3 Sk
PMColor from colmap |
163 "ldr r8, [%[table], r8, lsl #2] \n\t" // loa
d pixel 4 SkPMColor from colmap | 163 "ldr r8, [%[table], r8, lsl #2] \n\t" // load pixel 4 Sk
PMColor from colmap |
164 "ldr r9, [%[table], r9, lsl #2] \n\t" // loa
d pixel 5 SkPMColor from colmap | 164 "ldr r9, [%[table], r9, lsl #2] \n\t" // load pixel 5 Sk
PMColor from colmap |
165 "ldr r10, [%[table], r10, lsl #2] \n\t" // loa
d pixel 6 SkPMColor from colmap | 165 "ldr r10, [%[table], r10, lsl #2] \n\t" // load pixel 6 Sk
PMColor from colmap |
166 "ldr r11, [%[table], r11, lsl #2] \n\t" // loa
d pixel 7 SkPMColor from colmap | 166 "ldr r11, [%[table], r11, lsl #2] \n\t" // load pixel 7 Sk
PMColor from colmap |
167 "subs %[count], %[count], #8 \n\t" // dec
rement loop counter | 167 "subs %[count], %[count], #8 \n\t" // decrement loop
counter |
168 "stmia %[colors]!, {r4-r11} \n\t" // sto
re 8 pixels | 168 "stmia %[colors]!, {r4-r11} \n\t" // store 8 pixels |
169 "bge 1b \n\t" // loo
p if counter >= 0 | 169 "bge 1b \n\t" // loop if counter
>= 0 |
170 "2: \n\t" | 170 "2: \n\t" |
171 "adds %[count], %[count], #8 \n\t" // fix
up counter, set flags | 171 "adds %[count], %[count], #8 \n\t" // fix up counter,
set flags |
172 "beq 4f \n\t" // if
count == 0, branch to exit | 172 "beq 4f \n\t" // if count == 0,
branch to exit |
173 "3: \n\t" //
singles loop | 173 "3: \n\t" // singles loop |
174 "ldrh r4, [%[xx]], #2 \n\t" // loa
d pixel ptr | 174 "ldrh r4, [%[xx]], #2 \n\t" // load pixel ptr |
175 "subs %[count], %[count], #1 \n\t" // dec
rement loop counter | 175 "subs %[count], %[count], #1 \n\t" // decrement loop
counter |
176 "ldrb r5, [%[srcAddr], r4] \n\t" // loa
d pixel from image | 176 "ldrb r5, [%[srcAddr], r4] \n\t" // load pixel from
image |
177 "ldr r6, [%[table], r5, lsl #2] \n\t" // loa
d SkPMColor from colmap | 177 "ldr r6, [%[table], r5, lsl #2] \n\t" // load SkPMColor
from colmap |
178 "str r6, [%[colors]], #4 \n\t" // sto
re pixel, update ptr | 178 "str r6, [%[colors]], #4 \n\t" // store pixel, up
date ptr |
179 "bne 3b \n\t" // loo
p if counter != 0 | 179 "bne 3b \n\t" // loop if counter
!= 0 |
180 "4: \n\t" //
exit | 180 "4: \n\t" // exit |
181 : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (col
ors) | 181 : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (colors) |
182 : [table] "r" (table), [srcAddr] "r" (srcAddr) | 182 : [table] "r" (table), [srcAddr] "r" (srcAddr) |
183 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10
", "r11" | 183 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" |
184 ); | 184 ); |
185 } | 185 } |
186 | 186 |
187 s.fBitmap->getColorTable()->unlockColors(); | 187 s.fBitmap->getColorTable()->unlockColors(); |
188 } | 188 } |
189 #endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) | 189 #endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) |
190 | 190 |
191 /////////////////////////////////////////////////////////////////////////////// | 191 /////////////////////////////////////////////////////////////////////////////// |
192 | 192 |
193 /* If we replace a sampleproc, then we null-out the associated shaderproc, | 193 /* If we replace a sampleproc, then we null-out the associated shaderproc, |
194 otherwise the shader won't even look at the matrix/sampler | 194 otherwise the shader won't even look at the matrix/sampler |
(...skipping 20 matching lines...) Expand all Loading... |
215 fShaderProc32 = NULL; | 215 fShaderProc32 = NULL; |
216 } | 216 } |
217 } | 217 } |
218 #endif | 218 #endif |
219 break; | 219 break; |
220 default: | 220 default: |
221 break; | 221 break; |
222 } | 222 } |
223 } | 223 } |
224 | 224 |
225 ///////////////////////////////////// | 225 /////////////////////////////////////////////////////////////////////////////// |
226 | 226 |
227 /* FUNCTIONS BELOW ARE SCALAR STUBS INTENDED FOR ARM DEVELOPERS TO REPLACE */ | 227 extern void platformConvolutionProcs_arm_neon(SkConvolutionProcs* procs); |
228 | 228 |
229 ///////////////////////////////////// | 229 void platformConvolutionProcs_arm(SkConvolutionProcs* procs) { |
230 | |
231 | |
232 static inline unsigned char ClampTo8(int a) { | |
233 if (static_cast<unsigned>(a) < 256) { | |
234 return a; // Avoid the extra check in the common case. | |
235 } | |
236 if (a < 0) { | |
237 return 0; | |
238 } | |
239 return 255; | |
240 } | |
241 | |
242 // Convolves horizontally along a single row. The row data is given in | |
243 // |srcData| and continues for the numValues() of the filter. | |
244 void convolveHorizontally_arm(const unsigned char* srcData, | |
245 const SkConvolutionFilter1D& filter, | |
246 unsigned char* outRow, | |
247 bool hasAlpha) { | |
248 // Loop over each pixel on this row in the output image. | |
249 int numValues = filter.numValues(); | |
250 for (int outX = 0; outX < numValues; outX++) { | |
251 // Get the filter that determines the current output pixel. | |
252 int filterOffset, filterLength; | |
253 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | |
254 filter.FilterForValue(outX, &filterOffset, &filterLength); | |
255 | |
256 // Compute the first pixel in this row that the filter affects. It will | |
257 // touch |filterLength| pixels (4 bytes each) after this. | |
258 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; | |
259 | |
260 // Apply the filter to the row to get the destination pixel in |accum|. | |
261 int accum[4] = {0}; | |
262 for (int filterX = 0; filterX < filterLength; filterX++) { | |
263 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[fil
terX]; | |
264 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; | |
265 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; | |
266 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; | |
267 if (hasAlpha) { | |
268 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; | |
269 } | |
270 } | |
271 | |
272 // Bring this value back in range. All of the filter scaling factors | |
273 // are in fixed point with kShiftBits bits of fractional part. | |
274 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
275 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
276 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
277 if (hasAlpha) { | |
278 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
279 } | |
280 | |
281 // Store the new pixel. | |
282 outRow[outX * 4 + 0] = ClampTo8(accum[0]); | |
283 outRow[outX * 4 + 1] = ClampTo8(accum[1]); | |
284 outRow[outX * 4 + 2] = ClampTo8(accum[2]); | |
285 if (hasAlpha) { | |
286 outRow[outX * 4 + 3] = ClampTo8(accum[3]); | |
287 } | |
288 } | |
289 } | |
290 | |
291 // Does vertical convolution to produce one output row. The filter values and | |
292 // length are given in the first two parameters. These are applied to each | |
293 // of the rows pointed to in the |sourceDataRows| array, with each row | |
294 // being |pixelWidth| wide. | |
295 // | |
296 // The output must have room for |pixelWidth * 4| bytes. | |
297 template<bool hasAlpha> | |
298 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* f
ilterValues, | |
299 int filterLength, | |
300 unsigned char* const* sourceDataRows, | |
301 int pixelWidth, | |
302 unsigned char* outRow) { | |
303 // We go through each column in the output and do a vertical convolution
, | |
304 // generating one output pixel each time. | |
305 for (int outX = 0; outX < pixelWidth; outX++) { | |
306 // Compute the number of bytes over in each row that the current col
umn | |
307 // we're convolving starts at. The pixel will cover the next 4 bytes
. | |
308 int byteOffset = outX * 4; | |
309 | |
310 // Apply the filter to one column of pixels. | |
311 int accum[4] = {0}; | |
312 for (int filterY = 0; filterY < filterLength; filterY++) { | |
313 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterY]; | |
314 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; | |
315 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; | |
316 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; | |
317 if (hasAlpha) { | |
318 accum[3] += curFilter * sourceDataRows[filterY][byteOffset +
3]; | |
319 } | |
320 } | |
321 | |
322 // Bring this value back in range. All of the filter scaling factors | |
323 // are in fixed point with kShiftBits bits of precision. | |
324 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
325 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
326 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
327 if (hasAlpha) { | |
328 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
329 } | |
330 | |
331 // Store the new pixel. | |
332 outRow[byteOffset + 0] = ClampTo8(accum[0]); | |
333 outRow[byteOffset + 1] = ClampTo8(accum[1]); | |
334 outRow[byteOffset + 2] = ClampTo8(accum[2]); | |
335 if (hasAlpha) { | |
336 unsigned char alpha = ClampTo8(accum[3]); | |
337 | |
338 // Make sure the alpha channel doesn't come out smaller than any
of the | |
339 // color channels. We use premultipled alpha channels, so this s
hould | |
340 // never happen, but rounding errors will cause this from time t
o time. | |
341 // These "impossible" colors will cause overflows (and hence ran
dom pixel | |
342 // values) when the resulting bitmap is drawn to the screen. | |
343 // | |
344 // We only need to do this when generating the final output row
(here). | |
345 int maxColorChannel = SkTMax(outRow[byteOffset + 0], | |
346 SkTMax(outRow[byteOffset + 1], | |
347 outRow[byteOffset + 2])); | |
348 if (alpha < maxColorChannel) { | |
349 outRow[byteOffset + 3] = maxColorChannel; | |
350 } else { | |
351 outRow[byteOffset + 3] = alpha; | |
352 } | |
353 } else { | |
354 // No alpha channel, the image is opaque. | |
355 outRow[byteOffset + 3] = 0xff; | |
356 } | |
357 } | |
358 } | |
359 | |
360 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
361 int filterLength, | |
362 unsigned char* const* sourceDataRows, | |
363 int pixelWidth, | |
364 unsigned char* outRow, | |
365 bool sourceHasAlpha) { | |
366 if (sourceHasAlpha) { | |
367 convolveVertically_arm<true>(filterValues, filterLength, | |
368 sourceDataRows, pixelWidth, | |
369 outRow); | |
370 } else { | |
371 convolveVertically_arm<false>(filterValues, filterLength, | |
372 sourceDataRows, pixelWidth, | |
373 outRow); | |
374 } | |
375 } | |
376 | |
377 // Convolves horizontally along four rows. The row data is given in | |
378 // |src_data| and continues for the num_values() of the filter. | |
379 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please | |
380 // refer to that function for detailed comments. | |
381 void convolve4RowsHorizontally_arm(const unsigned char* src_data[4], | |
382 const SkConvolutionFilter1D& filter, | |
383 unsigned char* out_row[4]) { | |
384 } | |
385 | |
386 /////////////////////////// | |
387 | |
388 /* STOP REWRITING FUNCTIONS HERE, BUT DON'T FORGET TO EDIT THE | |
389 PLATFORM CONVOLUTION PROCS BELOW */ | |
390 | |
391 /////////////////////////// | |
392 | |
393 void applySIMDPadding_arm(SkConvolutionFilter1D *filter) { | |
394 // Padding |paddingCount| of more dummy coefficients after the coefficients | |
395 // of last filter to prevent SIMD instructions which load 8 or 16 bytes | |
396 // together to access invalid memory areas. We are not trying to align the | |
397 // coefficients right now due to the opaqueness of <vector> implementation. | |
398 // This has to be done after all |AddFilter| calls. | |
399 for (int i = 0; i < 8; ++i) { | |
400 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); | |
401 } | |
402 } | 230 } |
403 | 231 |
404 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { | 232 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { |
405 if (sk_cpu_arm_has_neon()) { | 233 SK_ARM_NEON_WRAP(platformConvolutionProcs_arm)(procs); |
406 procs->fExtraHorizontalReads = 3; | |
407 procs->fConvolveVertically = &convolveVertically_arm; | |
408 | |
409 // next line is commented out because the four-row convolution function
above is | |
410 // just a no-op. Please see the comment above its definition, and the S
SE implementation | |
411 // in SkBitmapProcState_opts_SSE2.cpp for guidance on its semantics. | |
412 // leaving it as NULL will just cause the convolution system to not atte
mpt | |
413 // to operate on four rows at once, which is correct but not performance
-optimal. | |
414 | |
415 // procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_arm; | |
416 | |
417 procs->fConvolve4RowsHorizontally = NULL; | |
418 | |
419 procs->fConvolveHorizontally = &convolveHorizontally_arm; | |
420 procs->fApplySIMDPadding = &applySIMDPadding_arm; | |
421 } | |
422 } | 234 } |
OLD | NEW |