OLD | NEW |
1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // NEON version of YUV to RGB upsampling functions. | 10 // NEON version of YUV to RGB upsampling functions. |
11 // | 11 // |
12 // Author: mans@mansr.com (Mans Rullgard) | 12 // Author: mans@mansr.com (Mans Rullgard) |
13 // Based on SSE code by: somnath@google.com (Somnath Banerjee) | 13 // Based on SSE code by: somnath@google.com (Somnath Banerjee) |
14 | 14 |
15 #include "./dsp.h" | 15 #include "./dsp.h" |
16 | 16 |
17 #if defined(__cplusplus) || defined(c_plusplus) | |
18 extern "C" { | |
19 #endif | |
20 | |
21 #if defined(WEBP_USE_NEON) | 17 #if defined(WEBP_USE_NEON) |
22 | 18 |
23 #include <assert.h> | 19 #include <assert.h> |
24 #include <arm_neon.h> | 20 #include <arm_neon.h> |
25 #include <string.h> | 21 #include <string.h> |
26 #include "./yuv.h" | 22 #include "./yuv.h" |
27 | 23 |
28 #ifdef FANCY_UPSAMPLING | 24 #ifdef FANCY_UPSAMPLING |
29 | 25 |
| 26 //----------------------------------------------------------------------------- |
| 27 // U/V upsampling |
| 28 |
30 // Loads 9 pixels each from rows r1 and r2 and generates 16 pixels. | 29 // Loads 9 pixels each from rows r1 and r2 and generates 16 pixels. |
31 #define UPSAMPLE_16PIXELS(r1, r2, out) { \ | 30 #define UPSAMPLE_16PIXELS(r1, r2, out) { \ |
32 uint8x8_t a = vld1_u8(r1); \ | 31 uint8x8_t a = vld1_u8(r1); \ |
33 uint8x8_t b = vld1_u8(r1 + 1); \ | 32 uint8x8_t b = vld1_u8(r1 + 1); \ |
34 uint8x8_t c = vld1_u8(r2); \ | 33 uint8x8_t c = vld1_u8(r2); \ |
35 uint8x8_t d = vld1_u8(r2 + 1); \ | 34 uint8x8_t d = vld1_u8(r2 + 1); \ |
36 \ | 35 \ |
37 uint16x8_t al = vshll_n_u8(a, 1); \ | 36 uint16x8_t al = vshll_n_u8(a, 1); \ |
38 uint16x8_t bl = vshll_n_u8(b, 1); \ | 37 uint16x8_t bl = vshll_n_u8(b, 1); \ |
39 uint16x8_t cl = vshll_n_u8(c, 1); \ | 38 uint16x8_t cl = vshll_n_u8(c, 1); \ |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
78 #define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \ | 77 #define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \ |
79 uint8_t r1[9], r2[9]; \ | 78 uint8_t r1[9], r2[9]; \ |
80 memcpy(r1, (tb), (num_pixels)); \ | 79 memcpy(r1, (tb), (num_pixels)); \ |
81 memcpy(r2, (bb), (num_pixels)); \ | 80 memcpy(r2, (bb), (num_pixels)); \ |
82 /* replicate last byte */ \ | 81 /* replicate last byte */ \ |
83 memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \ | 82 memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \ |
84 memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \ | 83 memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \ |
85 Upsample16Pixels(r1, r2, out); \ | 84 Upsample16Pixels(r1, r2, out); \ |
86 } | 85 } |
87 | 86 |
88 #define CY 76283 | 87 //----------------------------------------------------------------------------- |
89 #define CVR 89858 | 88 // YUV->RGB conversion |
90 #define CUG 22014 | |
91 #define CVG 45773 | |
92 #define CUB 113618 | |
93 | 89 |
94 static const int16_t coef[4] = { CVR / 4, CUG, CVG / 2, CUB / 4 }; | 90 static const int16_t kCoeffs[4] = { kYScale, kVToR, kUToG, kVToG }; |
| 91 |
| 92 #define v255 vmov_n_u8(255) |
| 93 |
| 94 #define STORE_Rgb(out, r, g, b) do { \ |
| 95 const uint8x8x3_t r_g_b = {{ r, g, b }}; \ |
| 96 vst3_u8(out, r_g_b); \ |
| 97 } while (0) |
| 98 |
| 99 #define STORE_Bgr(out, r, g, b) do { \ |
| 100 const uint8x8x3_t b_g_r = {{ b, g, r }}; \ |
| 101 vst3_u8(out, b_g_r); \ |
| 102 } while (0) |
| 103 |
| 104 #define STORE_Rgba(out, r, g, b) do { \ |
| 105 const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \ |
| 106 vst4_u8(out, r_g_b_v255); \ |
| 107 } while (0) |
| 108 |
| 109 #define STORE_Bgra(out, r, g, b) do { \ |
| 110 const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \ |
| 111 vst4_u8(out, b_g_r_v255); \ |
| 112 } while (0) |
95 | 113 |
96 #define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \ | 114 #define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \ |
97 int i; \ | 115 int i; \ |
98 for (i = 0; i < N; i += 8) { \ | 116 for (i = 0; i < N; i += 8) { \ |
99 int off = ((cur_x) + i) * XSTEP; \ | 117 const int off = ((cur_x) + i) * XSTEP; \ |
100 uint8x8_t y = vld1_u8(src_y + (cur_x) + i); \ | 118 uint8x8_t y = vld1_u8((src_y) + (cur_x) + i); \ |
101 uint8x8_t u = vld1_u8((src_uv) + i); \ | 119 uint8x8_t u = vld1_u8((src_uv) + i); \ |
102 uint8x8_t v = vld1_u8((src_uv) + i + 16); \ | 120 uint8x8_t v = vld1_u8((src_uv) + i + 16); \ |
103 int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \ | 121 const int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \ |
104 int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \ | 122 const int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \ |
105 int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \ | 123 const int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \ |
106 \ | 124 int32x4_t yl = vmull_lane_s16(vget_low_s16(yy), cf16, 0); \ |
107 int16x8_t ud = vshlq_n_s16(uu, 1); \ | 125 int32x4_t yh = vmull_lane_s16(vget_high_s16(yy), cf16, 0); \ |
108 int16x8_t vd = vshlq_n_s16(vv, 1); \ | 126 const int32x4_t rl = vmlal_lane_s16(yl, vget_low_s16(vv), cf16, 1);\ |
109 \ | 127 const int32x4_t rh = vmlal_lane_s16(yh, vget_high_s16(vv), cf16, 1);\ |
110 int32x4_t vrl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(vv), 1), \ | 128 int32x4_t gl = vmlsl_lane_s16(yl, vget_low_s16(uu), cf16, 2); \ |
111 vget_low_s16(vd), cf16, 0); \ | 129 int32x4_t gh = vmlsl_lane_s16(yh, vget_high_s16(uu), cf16, 2); \ |
112 int32x4_t vrh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(vv), 1), \ | 130 const int32x4_t bl = vmovl_s16(vget_low_s16(uu)); \ |
113 vget_high_s16(vd), cf16, 0); \ | 131 const int32x4_t bh = vmovl_s16(vget_high_s16(uu)); \ |
114 int16x8_t vr = vcombine_s16(vrshrn_n_s32(vrl, 16), \ | 132 gl = vmlsl_lane_s16(gl, vget_low_s16(vv), cf16, 3); \ |
115 vrshrn_n_s32(vrh, 16)); \ | 133 gh = vmlsl_lane_s16(gh, vget_high_s16(vv), cf16, 3); \ |
116 \ | 134 yl = vmlaq_lane_s32(yl, bl, cf32, 0); \ |
117 int32x4_t vl = vmovl_s16(vget_low_s16(vv)); \ | 135 yh = vmlaq_lane_s32(yh, bh, cf32, 0); \ |
118 int32x4_t vh = vmovl_s16(vget_high_s16(vv)); \ | 136 /* vrshrn_n_s32() already incorporates the rounding constant */ \ |
119 int32x4_t ugl = vmlal_lane_s16(vl, vget_low_s16(uu), cf16, 1); \ | 137 y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, YUV_FIX2), \ |
120 int32x4_t ugh = vmlal_lane_s16(vh, vget_high_s16(uu), cf16, 1); \ | 138 vrshrn_n_s32(rh, YUV_FIX2))); \ |
121 int32x4_t gcl = vqdmlal_lane_s16(ugl, vget_low_s16(vv), cf16, 2); \ | 139 u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, YUV_FIX2), \ |
122 int32x4_t gch = vqdmlal_lane_s16(ugh, vget_high_s16(vv), cf16, 2); \ | 140 vrshrn_n_s32(gh, YUV_FIX2))); \ |
123 int16x8_t gc = vcombine_s16(vrshrn_n_s32(gcl, 16), \ | 141 v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(yl, YUV_FIX2), \ |
124 vrshrn_n_s32(gch, 16)); \ | 142 vrshrn_n_s32(yh, YUV_FIX2))); \ |
125 \ | 143 STORE_ ## FMT(out + off, y, u, v); \ |
126 int32x4_t ubl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(uu), 1), \ | |
127 vget_low_s16(ud), cf16, 3); \ | |
128 int32x4_t ubh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(uu), 1), \ | |
129 vget_high_s16(ud), cf16, 3); \ | |
130 int16x8_t ub = vcombine_s16(vrshrn_n_s32(ubl, 16), \ | |
131 vrshrn_n_s32(ubh, 16)); \ | |
132 \ | |
133 int32x4_t rl = vaddl_s16(vget_low_s16(yy), vget_low_s16(vr)); \ | |
134 int32x4_t rh = vaddl_s16(vget_high_s16(yy), vget_high_s16(vr)); \ | |
135 int32x4_t gl = vsubl_s16(vget_low_s16(yy), vget_low_s16(gc)); \ | |
136 int32x4_t gh = vsubl_s16(vget_high_s16(yy), vget_high_s16(gc)); \ | |
137 int32x4_t bl = vaddl_s16(vget_low_s16(yy), vget_low_s16(ub)); \ | |
138 int32x4_t bh = vaddl_s16(vget_high_s16(yy), vget_high_s16(ub)); \ | |
139 \ | |
140 rl = vmulq_lane_s32(rl, cf32, 0); \ | |
141 rh = vmulq_lane_s32(rh, cf32, 0); \ | |
142 gl = vmulq_lane_s32(gl, cf32, 0); \ | |
143 gh = vmulq_lane_s32(gh, cf32, 0); \ | |
144 bl = vmulq_lane_s32(bl, cf32, 0); \ | |
145 bh = vmulq_lane_s32(bh, cf32, 0); \ | |
146 \ | |
147 y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, 16), \ | |
148 vrshrn_n_s32(rh, 16))); \ | |
149 u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, 16), \ | |
150 vrshrn_n_s32(gh, 16))); \ | |
151 v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(bl, 16), \ | |
152 vrshrn_n_s32(bh, 16))); \ | |
153 STR_ ## FMT(out + off, y, u, v); \ | |
154 } \ | 144 } \ |
155 } | 145 } |
156 | 146 |
157 #define v255 vmov_n_u8(255) | 147 #define CONVERT1(FUNC, XSTEP, N, src_y, src_uv, rgb, cur_x) { \ |
158 | |
159 #define STR_Rgb(out, r, g, b) do { \ | |
160 const uint8x8x3_t r_g_b = {{ r, g, b }}; \ | |
161 vst3_u8(out, r_g_b); \ | |
162 } while (0) | |
163 | |
164 #define STR_Bgr(out, r, g, b) do { \ | |
165 const uint8x8x3_t b_g_r = {{ b, g, r }}; \ | |
166 vst3_u8(out, b_g_r); \ | |
167 } while (0) | |
168 | |
169 #define STR_Rgba(out, r, g, b) do { \ | |
170 const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \ | |
171 vst4_u8(out, r_g_b_v255); \ | |
172 } while (0) | |
173 | |
174 #define STR_Bgra(out, r, g, b) do { \ | |
175 const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \ | |
176 vst4_u8(out, b_g_r_v255); \ | |
177 } while (0) | |
178 | |
179 #define CONVERT1(FMT, XSTEP, N, src_y, src_uv, rgb, cur_x) { \ | |
180 int i; \ | 148 int i; \ |
181 for (i = 0; i < N; i++) { \ | 149 for (i = 0; i < N; i++) { \ |
182 int off = ((cur_x) + i) * XSTEP; \ | 150 const int off = ((cur_x) + i) * XSTEP; \ |
183 int y = src_y[(cur_x) + i]; \ | 151 const int y = src_y[(cur_x) + i]; \ |
184 int u = (src_uv)[i]; \ | 152 const int u = (src_uv)[i]; \ |
185 int v = (src_uv)[i + 16]; \ | 153 const int v = (src_uv)[i + 16]; \ |
186 VP8YuvTo ## FMT(y, u, v, rgb + off); \ | 154 FUNC(y, u, v, rgb + off); \ |
187 } \ | 155 } \ |
188 } | 156 } |
189 | 157 |
190 #define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \ | 158 #define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \ |
191 top_dst, bottom_dst, cur_x, len) { \ | 159 top_dst, bottom_dst, cur_x, len) { \ |
192 if (top_y) { \ | 160 CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \ |
193 CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \ | 161 if (bottom_y != NULL) { \ |
194 } \ | |
195 if (bottom_y) { \ | |
196 CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \ | 162 CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \ |
197 } \ | 163 } \ |
198 } | 164 } |
199 | 165 |
200 #define CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, uv, \ | 166 #define CONVERT2RGB_1(FUNC, XSTEP, top_y, bottom_y, uv, \ |
201 top_dst, bottom_dst, cur_x, len) { \ | 167 top_dst, bottom_dst, cur_x, len) { \ |
202 if (top_y) { \ | 168 CONVERT1(FUNC, XSTEP, len, top_y, uv, top_dst, cur_x); \ |
203 CONVERT1(FMT, XSTEP, len, top_y, uv, top_dst, cur_x); \ | 169 if (bottom_y != NULL) { \ |
204 } \ | 170 CONVERT1(FUNC, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \ |
205 if (bottom_y) { \ | |
206 CONVERT1(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \ | |
207 } \ | 171 } \ |
208 } | 172 } |
209 | 173 |
210 #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ | 174 #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ |
211 static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ | 175 static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ |
212 const uint8_t *top_u, const uint8_t *top_v, \ | 176 const uint8_t *top_u, const uint8_t *top_v, \ |
213 const uint8_t *cur_u, const uint8_t *cur_v, \ | 177 const uint8_t *cur_u, const uint8_t *cur_v, \ |
214 uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ | 178 uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ |
215 int block; \ | 179 int block; \ |
216 /* 16 byte aligned array to cache reconstructed u and v */ \ | 180 /* 16 byte aligned array to cache reconstructed u and v */ \ |
217 uint8_t uv_buf[2 * 32 + 15]; \ | 181 uint8_t uv_buf[2 * 32 + 15]; \ |
218 uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ | 182 uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ |
219 const int uv_len = (len + 1) >> 1; \ | 183 const int uv_len = (len + 1) >> 1; \ |
220 /* 9 pixels must be read-able for each block */ \ | 184 /* 9 pixels must be read-able for each block */ \ |
221 const int num_blocks = (uv_len - 1) >> 3; \ | 185 const int num_blocks = (uv_len - 1) >> 3; \ |
222 const int leftover = uv_len - num_blocks * 8; \ | 186 const int leftover = uv_len - num_blocks * 8; \ |
223 const int last_pos = 1 + 16 * num_blocks; \ | 187 const int last_pos = 1 + 16 * num_blocks; \ |
224 \ | 188 \ |
225 const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \ | 189 const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \ |
226 const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \ | 190 const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \ |
227 \ | 191 \ |
228 const int16x4_t cf16 = vld1_s16(coef); \ | 192 const int16x4_t cf16 = vld1_s16(kCoeffs); \ |
229 const int32x2_t cf32 = vmov_n_s32(CY); \ | 193 const int32x2_t cf32 = vmov_n_s32(kUToB); \ |
230 const uint8x8_t u16 = vmov_n_u8(16); \ | 194 const uint8x8_t u16 = vmov_n_u8(16); \ |
231 const uint8x8_t u128 = vmov_n_u8(128); \ | 195 const uint8x8_t u128 = vmov_n_u8(128); \ |
232 \ | 196 \ |
233 /* Treat the first pixel in regular way */ \ | 197 /* Treat the first pixel in regular way */ \ |
234 if (top_y) { \ | 198 assert(top_y != NULL); \ |
| 199 { \ |
235 const int u0 = (top_u[0] + u_diag) >> 1; \ | 200 const int u0 = (top_u[0] + u_diag) >> 1; \ |
236 const int v0 = (top_v[0] + v_diag) >> 1; \ | 201 const int v0 = (top_v[0] + v_diag) >> 1; \ |
237 VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \ | 202 VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \ |
238 } \ | 203 } \ |
239 if (bottom_y) { \ | 204 if (bottom_y != NULL) { \ |
240 const int u0 = (cur_u[0] + u_diag) >> 1; \ | 205 const int u0 = (cur_u[0] + u_diag) >> 1; \ |
241 const int v0 = (cur_v[0] + v_diag) >> 1; \ | 206 const int v0 = (cur_v[0] + v_diag) >> 1; \ |
242 VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \ | 207 VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \ |
243 } \ | 208 } \ |
244 \ | 209 \ |
245 for (block = 0; block < num_blocks; ++block) { \ | 210 for (block = 0; block < num_blocks; ++block) { \ |
246 UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \ | 211 UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \ |
247 UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \ | 212 UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \ |
248 CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \ | 213 CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \ |
249 top_dst, bottom_dst, 16 * block + 1, 16); \ | 214 top_dst, bottom_dst, 16 * block + 1, 16); \ |
250 top_u += 8; \ | 215 top_u += 8; \ |
251 cur_u += 8; \ | 216 cur_u += 8; \ |
252 top_v += 8; \ | 217 top_v += 8; \ |
253 cur_v += 8; \ | 218 cur_v += 8; \ |
254 } \ | 219 } \ |
255 \ | 220 \ |
256 UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \ | 221 UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \ |
257 UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \ | 222 UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \ |
258 CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, r_uv, \ | 223 CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv, \ |
259 top_dst, bottom_dst, last_pos, len - last_pos); \ | 224 top_dst, bottom_dst, last_pos, len - last_pos); \ |
260 } | 225 } |
261 | 226 |
262 // NEON variants of the fancy upsampler. | 227 // NEON variants of the fancy upsampler. |
263 NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3) | 228 NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3) |
264 NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3) | 229 NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3) |
265 NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4) | 230 NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4) |
266 NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4) | 231 NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4) |
267 | 232 |
268 #endif // FANCY_UPSAMPLING | 233 #endif // FANCY_UPSAMPLING |
269 | 234 |
270 #endif // WEBP_USE_NEON | 235 #endif // WEBP_USE_NEON |
271 | 236 |
272 //------------------------------------------------------------------------------ | 237 //------------------------------------------------------------------------------ |
273 | 238 |
| 239 #ifdef FANCY_UPSAMPLING |
| 240 |
274 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; | 241 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; |
275 | 242 |
276 void WebPInitUpsamplersNEON(void) { | 243 void WebPInitUpsamplersNEON(void) { |
277 #if defined(WEBP_USE_NEON) | 244 #if defined(WEBP_USE_NEON) |
278 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON; | 245 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON; |
279 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON; | 246 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON; |
280 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON; | 247 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON; |
281 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON; | 248 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON; |
282 #endif // WEBP_USE_NEON | 249 #endif // WEBP_USE_NEON |
283 } | 250 } |
284 | 251 |
285 void WebPInitPremultiplyNEON(void) { | 252 void WebPInitPremultiplyNEON(void) { |
286 #if defined(WEBP_USE_NEON) | 253 #if defined(WEBP_USE_NEON) |
287 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON; | 254 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON; |
288 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON; | 255 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON; |
289 #endif // WEBP_USE_NEON | 256 #endif // WEBP_USE_NEON |
290 } | 257 } |
291 | 258 |
292 #if defined(__cplusplus) || defined(c_plusplus) | 259 #else |
293 } // extern "C" | 260 |
294 #endif | 261 // this empty function is to avoid an empty .o |
| 262 void WebPInitPremultiplyNEON(void) {} |
| 263 |
| 264 #endif // FANCY_UPSAMPLING |
| 265 |
OLD | NEW |