Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1078)

Side by Side Diff: third_party/libpng/arm/filter_neon.S

Issue 2021403002: Update libpng to 1.6.22 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rearrange pnglibconf.h Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libpng/arm/arm_init.c ('k') | third_party/libpng/arm/filter_neon_intrinsics.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1
2 /* filter_neon.S - NEON optimised filter functions
3 *
4 * Copyright (c) 2014 Glenn Randers-Pehrson
5 * Written by Mans Rullgard, 2011.
6 * Last changed in libpng 1.6.16 [December 22, 2014]
7 *
8 * This code is released under the libpng license.
9 * For conditions of distribution and use, see the disclaimer
10 * and license in png.h
11 */
12
13 /* This is required to get the symbol renames, which are #defines, and the
14 * definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION.
15 */
16 #define PNG_VERSION_INFO_ONLY
17 #include "../pngpriv.h"
18
19 #if defined(__linux__) && defined(__ELF__)
20 .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
21 #endif
22
23 #ifdef PNG_READ_SUPPORTED
24
25 /* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
26 * ARM64). The code in arm/filter_neon_intrinsics.c supports ARM64, however it
27 * only works if -mfpu=neon is specified on the GCC command line. See pngpriv.h
28 * for the logic which sets PNG_USE_ARM_NEON_ASM:
29 */
30 #if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
31
32 #if PNG_ARM_NEON_OPT > 0
33
34 #ifdef __ELF__
35 # define ELF
36 #else
37 # define ELF @
38 #endif
39
40 .arch armv7-a
41 .fpu neon
42
43 .macro func name, export=0
44 .macro endfunc
45 ELF .size \name, . - \name
46 .endfunc
47 .purgem endfunc
48 .endm
49 .text
50
51 /* Explicitly specifying alignment here because some versions of
52 * GAS don't align code correctly. This is harmless in correctly
53 * written versions of GAS.
54 */
55 .align 2
56
57 .if \export
58 .global \name
59 .endif
60 ELF .type \name, STT_FUNC
61 .func \name
62 \name:
63 .endm
64
65 func png_read_filter_row_sub4_neon, export=1
66 ldr r3, [r0, #4] @ rowbytes
67 vmov.i8 d3, #0
68 1:
69 vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
70 vadd.u8 d0, d3, d4
71 vadd.u8 d1, d0, d5
72 vadd.u8 d2, d1, d6
73 vadd.u8 d3, d2, d7
74 vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
75 subs r3, r3, #16
76 bgt 1b
77
78 bx lr
79 endfunc
80
81 func png_read_filter_row_sub3_neon, export=1
82 ldr r3, [r0, #4] @ rowbytes
83 vmov.i8 d3, #0
84 mov r0, r1
85 mov r2, #3
86 mov r12, #12
87 vld1.8 {q11}, [r0], r12
88 1:
89 vext.8 d5, d22, d23, #3
90 vadd.u8 d0, d3, d22
91 vext.8 d6, d22, d23, #6
92 vadd.u8 d1, d0, d5
93 vext.8 d7, d23, d23, #1
94 vld1.8 {q11}, [r0], r12
95 vst1.32 {d0[0]}, [r1,:32], r2
96 vadd.u8 d2, d1, d6
97 vst1.32 {d1[0]}, [r1], r2
98 vadd.u8 d3, d2, d7
99 vst1.32 {d2[0]}, [r1], r2
100 vst1.32 {d3[0]}, [r1], r2
101 subs r3, r3, #12
102 bgt 1b
103
104 bx lr
105 endfunc
106
107 func png_read_filter_row_up_neon, export=1
108 ldr r3, [r0, #4] @ rowbytes
109 1:
110 vld1.8 {q0}, [r1,:128]
111 vld1.8 {q1}, [r2,:128]!
112 vadd.u8 q0, q0, q1
113 vst1.8 {q0}, [r1,:128]!
114 subs r3, r3, #16
115 bgt 1b
116
117 bx lr
118 endfunc
119
120 func png_read_filter_row_avg4_neon, export=1
121 ldr r12, [r0, #4] @ rowbytes
122 vmov.i8 d3, #0
123 1:
124 vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
125 vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
126 vhadd.u8 d0, d3, d16
127 vadd.u8 d0, d0, d4
128 vhadd.u8 d1, d0, d17
129 vadd.u8 d1, d1, d5
130 vhadd.u8 d2, d1, d18
131 vadd.u8 d2, d2, d6
132 vhadd.u8 d3, d2, d19
133 vadd.u8 d3, d3, d7
134 vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
135 subs r12, r12, #16
136 bgt 1b
137
138 bx lr
139 endfunc
140
141 func png_read_filter_row_avg3_neon, export=1
142 push {r4,lr}
143 ldr r12, [r0, #4] @ rowbytes
144 vmov.i8 d3, #0
145 mov r0, r1
146 mov r4, #3
147 mov lr, #12
148 vld1.8 {q11}, [r0], lr
149 1:
150 vld1.8 {q10}, [r2], lr
151 vext.8 d5, d22, d23, #3
152 vhadd.u8 d0, d3, d20
153 vext.8 d17, d20, d21, #3
154 vadd.u8 d0, d0, d22
155 vext.8 d6, d22, d23, #6
156 vhadd.u8 d1, d0, d17
157 vext.8 d18, d20, d21, #6
158 vadd.u8 d1, d1, d5
159 vext.8 d7, d23, d23, #1
160 vld1.8 {q11}, [r0], lr
161 vst1.32 {d0[0]}, [r1,:32], r4
162 vhadd.u8 d2, d1, d18
163 vst1.32 {d1[0]}, [r1], r4
164 vext.8 d19, d21, d21, #1
165 vadd.u8 d2, d2, d6
166 vhadd.u8 d3, d2, d19
167 vst1.32 {d2[0]}, [r1], r4
168 vadd.u8 d3, d3, d7
169 vst1.32 {d3[0]}, [r1], r4
170 subs r12, r12, #12
171 bgt 1b
172
173 pop {r4,pc}
174 endfunc
175
176 .macro paeth rx, ra, rb, rc
177 vaddl.u8 q12, \ra, \rb @ a + b
178 vaddl.u8 q15, \rc, \rc @ 2*c
179 vabdl.u8 q13, \rb, \rc @ pa
180 vabdl.u8 q14, \ra, \rc @ pb
181 vabd.u16 q15, q12, q15 @ pc
182 vcle.u16 q12, q13, q14 @ pa <= pb
183 vcle.u16 q13, q13, q15 @ pa <= pc
184 vcle.u16 q14, q14, q15 @ pb <= pc
185 vand q12, q12, q13 @ pa <= pb && pa <= pc
186 vmovn.u16 d28, q14
187 vmovn.u16 \rx, q12
188 vbsl d28, \rb, \rc
189 vbsl \rx, \ra, d28
190 .endm
191
192 func png_read_filter_row_paeth4_neon, export=1
193 ldr r12, [r0, #4] @ rowbytes
194 vmov.i8 d3, #0
195 vmov.i8 d20, #0
196 1:
197 vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
198 vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
199 paeth d0, d3, d16, d20
200 vadd.u8 d0, d0, d4
201 paeth d1, d0, d17, d16
202 vadd.u8 d1, d1, d5
203 paeth d2, d1, d18, d17
204 vadd.u8 d2, d2, d6
205 paeth d3, d2, d19, d18
206 vmov d20, d19
207 vadd.u8 d3, d3, d7
208 vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
209 subs r12, r12, #16
210 bgt 1b
211
212 bx lr
213 endfunc
214
215 func png_read_filter_row_paeth3_neon, export=1
216 push {r4,lr}
217 ldr r12, [r0, #4] @ rowbytes
218 vmov.i8 d3, #0
219 vmov.i8 d4, #0
220 mov r0, r1
221 mov r4, #3
222 mov lr, #12
223 vld1.8 {q11}, [r0], lr
224 1:
225 vld1.8 {q10}, [r2], lr
226 paeth d0, d3, d20, d4
227 vext.8 d5, d22, d23, #3
228 vadd.u8 d0, d0, d22
229 vext.8 d17, d20, d21, #3
230 paeth d1, d0, d17, d20
231 vst1.32 {d0[0]}, [r1,:32], r4
232 vext.8 d6, d22, d23, #6
233 vadd.u8 d1, d1, d5
234 vext.8 d18, d20, d21, #6
235 paeth d2, d1, d18, d17
236 vext.8 d7, d23, d23, #1
237 vld1.8 {q11}, [r0], lr
238 vst1.32 {d1[0]}, [r1], r4
239 vadd.u8 d2, d2, d6
240 vext.8 d19, d21, d21, #1
241 paeth d3, d2, d19, d18
242 vst1.32 {d2[0]}, [r1], r4
243 vmov d4, d19
244 vadd.u8 d3, d3, d7
245 vst1.32 {d3[0]}, [r1], r4
246 subs r12, r12, #12
247 bgt 1b
248
249 pop {r4,pc}
250 endfunc
251 #endif /* PNG_ARM_NEON_OPT > 0 */
252 #endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
253 #endif /* READ */
OLDNEW
« no previous file with comments | « third_party/libpng/arm/arm_init.c ('k') | third_party/libpng/arm/filter_neon_intrinsics.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698