| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 | 8 |
| 9 #include "SkBitmapProcState.h" | 9 #include "SkBitmapProcState.h" |
| 10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 43 if (1 == s.fBitmap->width()) { | 43 if (1 == s.fBitmap->width()) { |
| 44 src = srcAddr[0]; | 44 src = srcAddr[0]; |
| 45 uint16_t dstValue = table[src]; | 45 uint16_t dstValue = table[src]; |
| 46 sk_memset16(colors, dstValue, count); | 46 sk_memset16(colors, dstValue, count); |
| 47 } else { | 47 } else { |
| 48 int i; | 48 int i; |
| 49 int count8 = count >> 3; | 49 int count8 = count >> 3; |
| 50 const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); | 50 const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); |
| 51 | 51 |
| 52 asm volatile ( | 52 asm volatile ( |
| 53 "cmp %[count8], #0 \n\t" // com
pare loop counter with 0 | 53 "cmp %[count8], #0 \n\t" // compare loop co
unter with 0 |
| 54 "beq 2f \n\t" // if
loop counter == 0, exit | 54 "beq 2f \n\t" // if loop counter
== 0, exit |
| 55 "1: \n\t" | 55 "1: \n\t" |
| 56 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // loa
d ptrs to pixels 0-7 | 56 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // load ptrs to pi
xels 0-7 |
| 57 "subs %[count8], %[count8], #1 \n\t" // dec
rement loop counter | 57 "subs %[count8], %[count8], #1 \n\t" // decrement loop
counter |
| 58 "uxth r4, r5 \n\t" // ext
ract ptr 0 | 58 "uxth r4, r5 \n\t" // extract ptr 0 |
| 59 "mov r5, r5, lsr #16 \n\t" // ext
ract ptr 1 | 59 "mov r5, r5, lsr #16 \n\t" // extract ptr 1 |
| 60 "uxth r6, r7 \n\t" // ext
ract ptr 2 | 60 "uxth r6, r7 \n\t" // extract ptr 2 |
| 61 "mov r7, r7, lsr #16 \n\t" // ext
ract ptr 3 | 61 "mov r7, r7, lsr #16 \n\t" // extract ptr 3 |
| 62 "ldrb r4, [%[srcAddr], r4] \n\t" // loa
d pixel 0 from image | 62 "ldrb r4, [%[srcAddr], r4] \n\t" // load pixel 0 fr
om image |
| 63 "uxth r8, r9 \n\t" // ext
ract ptr 4 | 63 "uxth r8, r9 \n\t" // extract ptr 4 |
| 64 "ldrb r5, [%[srcAddr], r5] \n\t" // loa
d pixel 1 from image | 64 "ldrb r5, [%[srcAddr], r5] \n\t" // load pixel 1 fr
om image |
| 65 "mov r9, r9, lsr #16 \n\t" // ext
ract ptr 5 | 65 "mov r9, r9, lsr #16 \n\t" // extract ptr 5 |
| 66 "ldrb r6, [%[srcAddr], r6] \n\t" // loa
d pixel 2 from image | 66 "ldrb r6, [%[srcAddr], r6] \n\t" // load pixel 2 fr
om image |
| 67 "uxth r10, r11 \n\t" // ext
ract ptr 6 | 67 "uxth r10, r11 \n\t" // extract ptr 6 |
| 68 "ldrb r7, [%[srcAddr], r7] \n\t" // loa
d pixel 3 from image | 68 "ldrb r7, [%[srcAddr], r7] \n\t" // load pixel 3 fr
om image |
| 69 "mov r11, r11, lsr #16 \n\t" // ext
ract ptr 7 | 69 "mov r11, r11, lsr #16 \n\t" // extract ptr 7 |
| 70 "ldrb r8, [%[srcAddr], r8] \n\t" // loa
d pixel 4 from image | 70 "ldrb r8, [%[srcAddr], r8] \n\t" // load pixel 4 fr
om image |
| 71 "add r4, r4, r4 \n\t" // dou
ble pixel 0 for RGB565 lookup | 71 "add r4, r4, r4 \n\t" // double pixel 0
for RGB565 lookup |
| 72 "ldrb r9, [%[srcAddr], r9] \n\t" // loa
d pixel 5 from image | 72 "ldrb r9, [%[srcAddr], r9] \n\t" // load pixel 5 fr
om image |
| 73 "add r5, r5, r5 \n\t" // dou
ble pixel 1 for RGB565 lookup | 73 "add r5, r5, r5 \n\t" // double pixel 1
for RGB565 lookup |
| 74 "ldrb r10, [%[srcAddr], r10] \n\t" // loa
d pixel 6 from image | 74 "ldrb r10, [%[srcAddr], r10] \n\t" // load pixel 6 fr
om image |
| 75 "add r6, r6, r6 \n\t" // dou
ble pixel 2 for RGB565 lookup | 75 "add r6, r6, r6 \n\t" // double pixel 2
for RGB565 lookup |
| 76 "ldrb r11, [%[srcAddr], r11] \n\t" // loa
d pixel 7 from image | 76 "ldrb r11, [%[srcAddr], r11] \n\t" // load pixel 7 fr
om image |
| 77 "add r7, r7, r7 \n\t" // dou
ble pixel 3 for RGB565 lookup | 77 "add r7, r7, r7 \n\t" // double pixel 3
for RGB565 lookup |
| 78 "ldrh r4, [%[table], r4] \n\t" // loa
d pixel 0 RGB565 from colmap | 78 "ldrh r4, [%[table], r4] \n\t" // load pixel 0 RG
B565 from colmap |
| 79 "add r8, r8, r8 \n\t" // dou
ble pixel 4 for RGB565 lookup | 79 "add r8, r8, r8 \n\t" // double pixel 4
for RGB565 lookup |
| 80 "ldrh r5, [%[table], r5] \n\t" // loa
d pixel 1 RGB565 from colmap | 80 "ldrh r5, [%[table], r5] \n\t" // load pixel 1 RG
B565 from colmap |
| 81 "add r9, r9, r9 \n\t" // dou
ble pixel 5 for RGB565 lookup | 81 "add r9, r9, r9 \n\t" // double pixel 5
for RGB565 lookup |
| 82 "ldrh r6, [%[table], r6] \n\t" // loa
d pixel 2 RGB565 from colmap | 82 "ldrh r6, [%[table], r6] \n\t" // load pixel 2 RG
B565 from colmap |
| 83 "add r10, r10, r10 \n\t" // dou
ble pixel 6 for RGB565 lookup | 83 "add r10, r10, r10 \n\t" // double pixel 6
for RGB565 lookup |
| 84 "ldrh r7, [%[table], r7] \n\t" // loa
d pixel 3 RGB565 from colmap | 84 "ldrh r7, [%[table], r7] \n\t" // load pixel 3 RG
B565 from colmap |
| 85 "add r11, r11, r11 \n\t" // dou
ble pixel 7 for RGB565 lookup | 85 "add r11, r11, r11 \n\t" // double pixel 7
for RGB565 lookup |
| 86 "ldrh r8, [%[table], r8] \n\t" // loa
d pixel 4 RGB565 from colmap | 86 "ldrh r8, [%[table], r8] \n\t" // load pixel 4 RG
B565 from colmap |
| 87 "ldrh r9, [%[table], r9] \n\t" // loa
d pixel 5 RGB565 from colmap | 87 "ldrh r9, [%[table], r9] \n\t" // load pixel 5 RG
B565 from colmap |
| 88 "ldrh r10, [%[table], r10] \n\t" // loa
d pixel 6 RGB565 from colmap | 88 "ldrh r10, [%[table], r10] \n\t" // load pixel 6 RG
B565 from colmap |
| 89 "ldrh r11, [%[table], r11] \n\t" // loa
d pixel 7 RGB565 from colmap | 89 "ldrh r11, [%[table], r11] \n\t" // load pixel 7 RG
B565 from colmap |
| 90 "pkhbt r5, r4, r5, lsl #16 \n\t" // pac
k pixels 0 and 1 | 90 "pkhbt r5, r4, r5, lsl #16 \n\t" // pack pixels 0 a
nd 1 |
| 91 "pkhbt r6, r6, r7, lsl #16 \n\t" // pac
k pixels 2 and 3 | 91 "pkhbt r6, r6, r7, lsl #16 \n\t" // pack pixels 2 a
nd 3 |
| 92 "pkhbt r8, r8, r9, lsl #16 \n\t" // pac
k pixels 4 and 5 | 92 "pkhbt r8, r8, r9, lsl #16 \n\t" // pack pixels 4 a
nd 5 |
| 93 "pkhbt r10, r10, r11, lsl #16 \n\t" // pac
k pixels 6 and 7 | 93 "pkhbt r10, r10, r11, lsl #16 \n\t" // pack pixels 6 a
nd 7 |
| 94 "stmia %[colors]!, {r5, r6, r8, r10} \n\t" // sto
re last 8 pixels | 94 "stmia %[colors]!, {r5, r6, r8, r10} \n\t" // store last 8 pi
xels |
| 95 "bgt 1b \n\t" // loo
p if counter > 0 | 95 "bgt 1b \n\t" // loop if counter
> 0 |
| 96 "2: \n\t" | 96 "2: \n\t" |
| 97 : [xx] "+r" (xx), [count8] "+r" (count8), [colors] "+r" (c
olors) | 97 : [xx] "+r" (xx), [count8] "+r" (count8), [colors] "+r" (colors) |
| 98 : [table] "r" (table), [srcAddr] "r" (srcAddr) | 98 : [table] "r" (table), [srcAddr] "r" (srcAddr) |
| 99 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10
", "r11" | 99 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" |
| 100 ); | 100 ); |
| 101 | 101 |
| 102 for (i = (count & 7); i > 0; --i) { | 102 for (i = (count & 7); i > 0; --i) { |
| 103 src = srcAddr[*xx++]; *colors++ = table[src]; | 103 src = srcAddr[*xx++]; *colors++ = table[src]; |
| 104 } | 104 } |
| 105 } | 105 } |
| 106 | 106 |
| 107 s.fBitmap->getColorTable()->unlock16BitCache(); | 107 s.fBitmap->getColorTable()->unlock16BitCache(); |
| 108 } | 108 } |
| 109 | 109 |
| 110 void SI8_opaque_D32_nofilter_DX_arm( | 110 void SI8_opaque_D32_nofilter_DX_arm( |
| (...skipping 18 matching lines...) Expand all Loading... |
| 129 srcAddr = (const uint8_t*)((const char*)srcAddr + xy[0] * s.fBitmap->rowByte
s()); | 129 srcAddr = (const uint8_t*)((const char*)srcAddr + xy[0] * s.fBitmap->rowByte
s()); |
| 130 | 130 |
| 131 if (1 == s.fBitmap->width()) { | 131 if (1 == s.fBitmap->width()) { |
| 132 uint8_t src = srcAddr[0]; | 132 uint8_t src = srcAddr[0]; |
| 133 SkPMColor dstValue = table[src]; | 133 SkPMColor dstValue = table[src]; |
| 134 sk_memset32(colors, dstValue, count); | 134 sk_memset32(colors, dstValue, count); |
| 135 } else { | 135 } else { |
| 136 const uint16_t* xx = (const uint16_t*)(xy + 1); | 136 const uint16_t* xx = (const uint16_t*)(xy + 1); |
| 137 | 137 |
| 138 asm volatile ( | 138 asm volatile ( |
| 139 "subs %[count], %[count], #8 \n\t" // dec
rement count by 8, set flags | 139 "subs %[count], %[count], #8 \n\t" // decrement count
by 8, set flags |
| 140 "blt 2f \n\t" // if
count < 0, branch to singles | 140 "blt 2f \n\t" // if count < 0, b
ranch to singles |
| 141 "1: \n\t" //
eights loop | 141 "1: \n\t" // eights loop |
| 142 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // loa
d ptrs to pixels 0-7 | 142 "ldmia %[xx]!, {r5, r7, r9, r11} \n\t" // load ptrs to pi
xels 0-7 |
| 143 "uxth r4, r5 \n\t" // ext
ract ptr 0 | 143 "uxth r4, r5 \n\t" // extract ptr 0 |
| 144 "mov r5, r5, lsr #16 \n\t" // ext
ract ptr 1 | 144 "mov r5, r5, lsr #16 \n\t" // extract ptr 1 |
| 145 "uxth r6, r7 \n\t" // ext
ract ptr 2 | 145 "uxth r6, r7 \n\t" // extract ptr 2 |
| 146 "mov r7, r7, lsr #16 \n\t" // ext
ract ptr 3 | 146 "mov r7, r7, lsr #16 \n\t" // extract ptr 3 |
| 147 "ldrb r4, [%[srcAddr], r4] \n\t" // loa
d pixel 0 from image | 147 "ldrb r4, [%[srcAddr], r4] \n\t" // load pixel 0 fr
om image |
| 148 "uxth r8, r9 \n\t" // ext
ract ptr 4 | 148 "uxth r8, r9 \n\t" // extract ptr 4 |
| 149 "ldrb r5, [%[srcAddr], r5] \n\t" // loa
d pixel 1 from image | 149 "ldrb r5, [%[srcAddr], r5] \n\t" // load pixel 1 fr
om image |
| 150 "mov r9, r9, lsr #16 \n\t" // ext
ract ptr 5 | 150 "mov r9, r9, lsr #16 \n\t" // extract ptr 5 |
| 151 "ldrb r6, [%[srcAddr], r6] \n\t" // loa
d pixel 2 from image | 151 "ldrb r6, [%[srcAddr], r6] \n\t" // load pixel 2 fr
om image |
| 152 "uxth r10, r11 \n\t" // ext
ract ptr 6 | 152 "uxth r10, r11 \n\t" // extract ptr 6 |
| 153 "ldrb r7, [%[srcAddr], r7] \n\t" // loa
d pixel 3 from image | 153 "ldrb r7, [%[srcAddr], r7] \n\t" // load pixel 3 fr
om image |
| 154 "mov r11, r11, lsr #16 \n\t" // ext
ract ptr 7 | 154 "mov r11, r11, lsr #16 \n\t" // extract ptr 7 |
| 155 "ldrb r8, [%[srcAddr], r8] \n\t" // loa
d pixel 4 from image | 155 "ldrb r8, [%[srcAddr], r8] \n\t" // load pixel 4 fr
om image |
| 156 "ldrb r9, [%[srcAddr], r9] \n\t" // loa
d pixel 5 from image | 156 "ldrb r9, [%[srcAddr], r9] \n\t" // load pixel 5 fr
om image |
| 157 "ldrb r10, [%[srcAddr], r10] \n\t" // loa
d pixel 6 from image | 157 "ldrb r10, [%[srcAddr], r10] \n\t" // load pixel 6 fr
om image |
| 158 "ldrb r11, [%[srcAddr], r11] \n\t" // loa
d pixel 7 from image | 158 "ldrb r11, [%[srcAddr], r11] \n\t" // load pixel 7 fr
om image |
| 159 "ldr r4, [%[table], r4, lsl #2] \n\t" // loa
d pixel 0 SkPMColor from colmap | 159 "ldr r4, [%[table], r4, lsl #2] \n\t" // load pixel 0 Sk
PMColor from colmap |
| 160 "ldr r5, [%[table], r5, lsl #2] \n\t" // loa
d pixel 1 SkPMColor from colmap | 160 "ldr r5, [%[table], r5, lsl #2] \n\t" // load pixel 1 Sk
PMColor from colmap |
| 161 "ldr r6, [%[table], r6, lsl #2] \n\t" // loa
d pixel 2 SkPMColor from colmap | 161 "ldr r6, [%[table], r6, lsl #2] \n\t" // load pixel 2 Sk
PMColor from colmap |
| 162 "ldr r7, [%[table], r7, lsl #2] \n\t" // loa
d pixel 3 SkPMColor from colmap | 162 "ldr r7, [%[table], r7, lsl #2] \n\t" // load pixel 3 Sk
PMColor from colmap |
| 163 "ldr r8, [%[table], r8, lsl #2] \n\t" // loa
d pixel 4 SkPMColor from colmap | 163 "ldr r8, [%[table], r8, lsl #2] \n\t" // load pixel 4 Sk
PMColor from colmap |
| 164 "ldr r9, [%[table], r9, lsl #2] \n\t" // loa
d pixel 5 SkPMColor from colmap | 164 "ldr r9, [%[table], r9, lsl #2] \n\t" // load pixel 5 Sk
PMColor from colmap |
| 165 "ldr r10, [%[table], r10, lsl #2] \n\t" // loa
d pixel 6 SkPMColor from colmap | 165 "ldr r10, [%[table], r10, lsl #2] \n\t" // load pixel 6 Sk
PMColor from colmap |
| 166 "ldr r11, [%[table], r11, lsl #2] \n\t" // loa
d pixel 7 SkPMColor from colmap | 166 "ldr r11, [%[table], r11, lsl #2] \n\t" // load pixel 7 Sk
PMColor from colmap |
| 167 "subs %[count], %[count], #8 \n\t" // dec
rement loop counter | 167 "subs %[count], %[count], #8 \n\t" // decrement loop
counter |
| 168 "stmia %[colors]!, {r4-r11} \n\t" // sto
re 8 pixels | 168 "stmia %[colors]!, {r4-r11} \n\t" // store 8 pixels |
| 169 "bge 1b \n\t" // loo
p if counter >= 0 | 169 "bge 1b \n\t" // loop if counter
>= 0 |
| 170 "2: \n\t" | 170 "2: \n\t" |
| 171 "adds %[count], %[count], #8 \n\t" // fix
up counter, set flags | 171 "adds %[count], %[count], #8 \n\t" // fix up counter,
set flags |
| 172 "beq 4f \n\t" // if
count == 0, branch to exit | 172 "beq 4f \n\t" // if count == 0,
branch to exit |
| 173 "3: \n\t" //
singles loop | 173 "3: \n\t" // singles loop |
| 174 "ldrh r4, [%[xx]], #2 \n\t" // loa
d pixel ptr | 174 "ldrh r4, [%[xx]], #2 \n\t" // load pixel ptr |
| 175 "subs %[count], %[count], #1 \n\t" // dec
rement loop counter | 175 "subs %[count], %[count], #1 \n\t" // decrement loop
counter |
| 176 "ldrb r5, [%[srcAddr], r4] \n\t" // loa
d pixel from image | 176 "ldrb r5, [%[srcAddr], r4] \n\t" // load pixel from
image |
| 177 "ldr r6, [%[table], r5, lsl #2] \n\t" // loa
d SkPMColor from colmap | 177 "ldr r6, [%[table], r5, lsl #2] \n\t" // load SkPMColor
from colmap |
| 178 "str r6, [%[colors]], #4 \n\t" // sto
re pixel, update ptr | 178 "str r6, [%[colors]], #4 \n\t" // store pixel, up
date ptr |
| 179 "bne 3b \n\t" // loo
p if counter != 0 | 179 "bne 3b \n\t" // loop if counter
!= 0 |
| 180 "4: \n\t" //
exit | 180 "4: \n\t" // exit |
| 181 : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (col
ors) | 181 : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (colors) |
| 182 : [table] "r" (table), [srcAddr] "r" (srcAddr) | 182 : [table] "r" (table), [srcAddr] "r" (srcAddr) |
| 183 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10
", "r11" | 183 : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" |
| 184 ); | 184 ); |
| 185 } | 185 } |
| 186 | 186 |
| 187 s.fBitmap->getColorTable()->unlockColors(); | 187 s.fBitmap->getColorTable()->unlockColors(); |
| 188 } | 188 } |
| 189 #endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) | 189 #endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) |
| 190 | 190 |
| 191 /////////////////////////////////////////////////////////////////////////////// | 191 /////////////////////////////////////////////////////////////////////////////// |
| 192 | 192 |
| 193 /* If we replace a sampleproc, then we null-out the associated shaderproc, | 193 /* If we replace a sampleproc, then we null-out the associated shaderproc, |
| 194 otherwise the shader won't even look at the matrix/sampler | 194 otherwise the shader won't even look at the matrix/sampler |
| (...skipping 20 matching lines...) Expand all Loading... |
| 215 fShaderProc32 = NULL; | 215 fShaderProc32 = NULL; |
| 216 } | 216 } |
| 217 } | 217 } |
| 218 #endif | 218 #endif |
| 219 break; | 219 break; |
| 220 default: | 220 default: |
| 221 break; | 221 break; |
| 222 } | 222 } |
| 223 } | 223 } |
| 224 | 224 |
| 225 ///////////////////////////////////// | 225 /////////////////////////////////////////////////////////////////////////////// |
| 226 | 226 |
| 227 /* FUNCTIONS BELOW ARE SCALAR STUBS INTENDED FOR ARM DEVELOPERS TO REPLACE */ | 227 extern void platformConvolutionProcs_arm_neon(SkConvolutionProcs* procs); |
| 228 | 228 |
| 229 ///////////////////////////////////// | 229 void platformConvolutionProcs_arm(SkConvolutionProcs* procs) { |
| 230 | |
| 231 | |
| 232 static inline unsigned char ClampTo8(int a) { | |
| 233 if (static_cast<unsigned>(a) < 256) { | |
| 234 return a; // Avoid the extra check in the common case. | |
| 235 } | |
| 236 if (a < 0) { | |
| 237 return 0; | |
| 238 } | |
| 239 return 255; | |
| 240 } | |
| 241 | |
| 242 // Convolves horizontally along a single row. The row data is given in | |
| 243 // |srcData| and continues for the numValues() of the filter. | |
| 244 void convolveHorizontally_arm(const unsigned char* srcData, | |
| 245 const SkConvolutionFilter1D& filter, | |
| 246 unsigned char* outRow, | |
| 247 bool hasAlpha) { | |
| 248 // Loop over each pixel on this row in the output image. | |
| 249 int numValues = filter.numValues(); | |
| 250 for (int outX = 0; outX < numValues; outX++) { | |
| 251 // Get the filter that determines the current output pixel. | |
| 252 int filterOffset, filterLength; | |
| 253 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | |
| 254 filter.FilterForValue(outX, &filterOffset, &filterLength); | |
| 255 | |
| 256 // Compute the first pixel in this row that the filter affects. It will | |
| 257 // touch |filterLength| pixels (4 bytes each) after this. | |
| 258 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; | |
| 259 | |
| 260 // Apply the filter to the row to get the destination pixel in |accum|. | |
| 261 int accum[4] = {0}; | |
| 262 for (int filterX = 0; filterX < filterLength; filterX++) { | |
| 263 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[fil
terX]; | |
| 264 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; | |
| 265 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; | |
| 266 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; | |
| 267 if (hasAlpha) { | |
| 268 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; | |
| 269 } | |
| 270 } | |
| 271 | |
| 272 // Bring this value back in range. All of the filter scaling factors | |
| 273 // are in fixed point with kShiftBits bits of fractional part. | |
| 274 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
| 275 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
| 276 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
| 277 if (hasAlpha) { | |
| 278 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
| 279 } | |
| 280 | |
| 281 // Store the new pixel. | |
| 282 outRow[outX * 4 + 0] = ClampTo8(accum[0]); | |
| 283 outRow[outX * 4 + 1] = ClampTo8(accum[1]); | |
| 284 outRow[outX * 4 + 2] = ClampTo8(accum[2]); | |
| 285 if (hasAlpha) { | |
| 286 outRow[outX * 4 + 3] = ClampTo8(accum[3]); | |
| 287 } | |
| 288 } | |
| 289 } | |
| 290 | |
| 291 // Does vertical convolution to produce one output row. The filter values and | |
| 292 // length are given in the first two parameters. These are applied to each | |
| 293 // of the rows pointed to in the |sourceDataRows| array, with each row | |
| 294 // being |pixelWidth| wide. | |
| 295 // | |
| 296 // The output must have room for |pixelWidth * 4| bytes. | |
| 297 template<bool hasAlpha> | |
| 298 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* f
ilterValues, | |
| 299 int filterLength, | |
| 300 unsigned char* const* sourceDataRows, | |
| 301 int pixelWidth, | |
| 302 unsigned char* outRow) { | |
| 303 // We go through each column in the output and do a vertical convolution
, | |
| 304 // generating one output pixel each time. | |
| 305 for (int outX = 0; outX < pixelWidth; outX++) { | |
| 306 // Compute the number of bytes over in each row that the current col
umn | |
| 307 // we're convolving starts at. The pixel will cover the next 4 bytes
. | |
| 308 int byteOffset = outX * 4; | |
| 309 | |
| 310 // Apply the filter to one column of pixels. | |
| 311 int accum[4] = {0}; | |
| 312 for (int filterY = 0; filterY < filterLength; filterY++) { | |
| 313 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterY]; | |
| 314 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; | |
| 315 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; | |
| 316 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; | |
| 317 if (hasAlpha) { | |
| 318 accum[3] += curFilter * sourceDataRows[filterY][byteOffset +
3]; | |
| 319 } | |
| 320 } | |
| 321 | |
| 322 // Bring this value back in range. All of the filter scaling factors | |
| 323 // are in fixed point with kShiftBits bits of precision. | |
| 324 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
| 325 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
| 326 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
| 327 if (hasAlpha) { | |
| 328 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
| 329 } | |
| 330 | |
| 331 // Store the new pixel. | |
| 332 outRow[byteOffset + 0] = ClampTo8(accum[0]); | |
| 333 outRow[byteOffset + 1] = ClampTo8(accum[1]); | |
| 334 outRow[byteOffset + 2] = ClampTo8(accum[2]); | |
| 335 if (hasAlpha) { | |
| 336 unsigned char alpha = ClampTo8(accum[3]); | |
| 337 | |
| 338 // Make sure the alpha channel doesn't come out smaller than any
of the | |
| 339 // color channels. We use premultipled alpha channels, so this s
hould | |
| 340 // never happen, but rounding errors will cause this from time t
o time. | |
| 341 // These "impossible" colors will cause overflows (and hence ran
dom pixel | |
| 342 // values) when the resulting bitmap is drawn to the screen. | |
| 343 // | |
| 344 // We only need to do this when generating the final output row
(here). | |
| 345 int maxColorChannel = SkTMax(outRow[byteOffset + 0], | |
| 346 SkTMax(outRow[byteOffset + 1], | |
| 347 outRow[byteOffset + 2])); | |
| 348 if (alpha < maxColorChannel) { | |
| 349 outRow[byteOffset + 3] = maxColorChannel; | |
| 350 } else { | |
| 351 outRow[byteOffset + 3] = alpha; | |
| 352 } | |
| 353 } else { | |
| 354 // No alpha channel, the image is opaque. | |
| 355 outRow[byteOffset + 3] = 0xff; | |
| 356 } | |
| 357 } | |
| 358 } | |
| 359 | |
| 360 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
| 361 int filterLength, | |
| 362 unsigned char* const* sourceDataRows, | |
| 363 int pixelWidth, | |
| 364 unsigned char* outRow, | |
| 365 bool sourceHasAlpha) { | |
| 366 if (sourceHasAlpha) { | |
| 367 convolveVertically_arm<true>(filterValues, filterLength, | |
| 368 sourceDataRows, pixelWidth, | |
| 369 outRow); | |
| 370 } else { | |
| 371 convolveVertically_arm<false>(filterValues, filterLength, | |
| 372 sourceDataRows, pixelWidth, | |
| 373 outRow); | |
| 374 } | |
| 375 } | |
| 376 | |
| 377 // Convolves horizontally along four rows. The row data is given in | |
| 378 // |src_data| and continues for the num_values() of the filter. | |
| 379 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please | |
| 380 // refer to that function for detailed comments. | |
| 381 void convolve4RowsHorizontally_arm(const unsigned char* src_data[4], | |
| 382 const SkConvolutionFilter1D& filter, | |
| 383 unsigned char* out_row[4]) { | |
| 384 } | |
| 385 | |
| 386 /////////////////////////// | |
| 387 | |
| 388 /* STOP REWRITING FUNCTIONS HERE, BUT DON'T FORGET TO EDIT THE | |
| 389 PLATFORM CONVOLUTION PROCS BELOW */ | |
| 390 | |
| 391 /////////////////////////// | |
| 392 | |
| 393 void applySIMDPadding_arm(SkConvolutionFilter1D *filter) { | |
| 394 // Padding |paddingCount| of more dummy coefficients after the coefficients | |
| 395 // of last filter to prevent SIMD instructions which load 8 or 16 bytes | |
| 396 // together to access invalid memory areas. We are not trying to align the | |
| 397 // coefficients right now due to the opaqueness of <vector> implementation. | |
| 398 // This has to be done after all |AddFilter| calls. | |
| 399 for (int i = 0; i < 8; ++i) { | |
| 400 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); | |
| 401 } | |
| 402 } | 230 } |
| 403 | 231 |
| 404 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { | 232 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { |
| 405 if (sk_cpu_arm_has_neon()) { | 233 SK_ARM_NEON_WRAP(platformConvolutionProcs_arm)(procs); |
| 406 procs->fExtraHorizontalReads = 3; | |
| 407 procs->fConvolveVertically = &convolveVertically_arm; | |
| 408 | |
| 409 // next line is commented out because the four-row convolution function
above is | |
| 410 // just a no-op. Please see the comment above its definition, and the S
SE implementation | |
| 411 // in SkBitmapProcState_opts_SSE2.cpp for guidance on its semantics. | |
| 412 // leaving it as NULL will just cause the convolution system to not atte
mpt | |
| 413 // to operate on four rows at once, which is correct but not performance
-optimal. | |
| 414 | |
| 415 // procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_arm; | |
| 416 | |
| 417 procs->fConvolve4RowsHorizontally = NULL; | |
| 418 | |
| 419 procs->fConvolveHorizontally = &convolveHorizontally_arm; | |
| 420 procs->fApplySIMDPadding = &applySIMDPadding_arm; | |
| 421 } | |
| 422 } | 234 } |
| OLD | NEW |