| OLD | NEW |
| 1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
| 9 // | 9 // |
| 10 // NEON version of YUV to RGB upsampling functions. | 10 // NEON version of YUV to RGB upsampling functions. |
| 11 // | 11 // |
| 12 // Author: mans@mansr.com (Mans Rullgard) | 12 // Author: mans@mansr.com (Mans Rullgard) |
| 13 // Based on SSE code by: somnath@google.com (Somnath Banerjee) | 13 // Based on SSE code by: somnath@google.com (Somnath Banerjee) |
| 14 | 14 |
| 15 #include "./dsp.h" | 15 #include "./dsp.h" |
| 16 | 16 |
| 17 #if defined(__cplusplus) || defined(c_plusplus) | |
| 18 extern "C" { | |
| 19 #endif | |
| 20 | |
| 21 #if defined(WEBP_USE_NEON) | 17 #if defined(WEBP_USE_NEON) |
| 22 | 18 |
| 23 #include <assert.h> | 19 #include <assert.h> |
| 24 #include <arm_neon.h> | 20 #include <arm_neon.h> |
| 25 #include <string.h> | 21 #include <string.h> |
| 26 #include "./yuv.h" | 22 #include "./yuv.h" |
| 27 | 23 |
| 28 #ifdef FANCY_UPSAMPLING | 24 #ifdef FANCY_UPSAMPLING |
| 29 | 25 |
| 26 //----------------------------------------------------------------------------- |
| 27 // U/V upsampling |
| 28 |
| 30 // Loads 9 pixels each from rows r1 and r2 and generates 16 pixels. | 29 // Loads 9 pixels each from rows r1 and r2 and generates 16 pixels. |
| 31 #define UPSAMPLE_16PIXELS(r1, r2, out) { \ | 30 #define UPSAMPLE_16PIXELS(r1, r2, out) { \ |
| 32 uint8x8_t a = vld1_u8(r1); \ | 31 uint8x8_t a = vld1_u8(r1); \ |
| 33 uint8x8_t b = vld1_u8(r1 + 1); \ | 32 uint8x8_t b = vld1_u8(r1 + 1); \ |
| 34 uint8x8_t c = vld1_u8(r2); \ | 33 uint8x8_t c = vld1_u8(r2); \ |
| 35 uint8x8_t d = vld1_u8(r2 + 1); \ | 34 uint8x8_t d = vld1_u8(r2 + 1); \ |
| 36 \ | 35 \ |
| 37 uint16x8_t al = vshll_n_u8(a, 1); \ | 36 uint16x8_t al = vshll_n_u8(a, 1); \ |
| 38 uint16x8_t bl = vshll_n_u8(b, 1); \ | 37 uint16x8_t bl = vshll_n_u8(b, 1); \ |
| 39 uint16x8_t cl = vshll_n_u8(c, 1); \ | 38 uint16x8_t cl = vshll_n_u8(c, 1); \ |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 #define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \ | 77 #define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \ |
| 79 uint8_t r1[9], r2[9]; \ | 78 uint8_t r1[9], r2[9]; \ |
| 80 memcpy(r1, (tb), (num_pixels)); \ | 79 memcpy(r1, (tb), (num_pixels)); \ |
| 81 memcpy(r2, (bb), (num_pixels)); \ | 80 memcpy(r2, (bb), (num_pixels)); \ |
| 82 /* replicate last byte */ \ | 81 /* replicate last byte */ \ |
| 83 memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \ | 82 memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \ |
| 84 memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \ | 83 memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \ |
| 85 Upsample16Pixels(r1, r2, out); \ | 84 Upsample16Pixels(r1, r2, out); \ |
| 86 } | 85 } |
| 87 | 86 |
| 88 #define CY 76283 | 87 //----------------------------------------------------------------------------- |
| 89 #define CVR 89858 | 88 // YUV->RGB conversion |
| 90 #define CUG 22014 | |
| 91 #define CVG 45773 | |
| 92 #define CUB 113618 | |
| 93 | 89 |
| 94 static const int16_t coef[4] = { CVR / 4, CUG, CVG / 2, CUB / 4 }; | 90 static const int16_t kCoeffs[4] = { kYScale, kVToR, kUToG, kVToG }; |
| 91 |
| 92 #define v255 vmov_n_u8(255) |
| 93 |
| 94 #define STORE_Rgb(out, r, g, b) do { \ |
| 95 const uint8x8x3_t r_g_b = {{ r, g, b }}; \ |
| 96 vst3_u8(out, r_g_b); \ |
| 97 } while (0) |
| 98 |
| 99 #define STORE_Bgr(out, r, g, b) do { \ |
| 100 const uint8x8x3_t b_g_r = {{ b, g, r }}; \ |
| 101 vst3_u8(out, b_g_r); \ |
| 102 } while (0) |
| 103 |
| 104 #define STORE_Rgba(out, r, g, b) do { \ |
| 105 const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \ |
| 106 vst4_u8(out, r_g_b_v255); \ |
| 107 } while (0) |
| 108 |
| 109 #define STORE_Bgra(out, r, g, b) do { \ |
| 110 const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \ |
| 111 vst4_u8(out, b_g_r_v255); \ |
| 112 } while (0) |
| 95 | 113 |
| 96 #define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \ | 114 #define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \ |
| 97 int i; \ | 115 int i; \ |
| 98 for (i = 0; i < N; i += 8) { \ | 116 for (i = 0; i < N; i += 8) { \ |
| 99 int off = ((cur_x) + i) * XSTEP; \ | 117 const int off = ((cur_x) + i) * XSTEP; \ |
| 100 uint8x8_t y = vld1_u8(src_y + (cur_x) + i); \ | 118 uint8x8_t y = vld1_u8((src_y) + (cur_x) + i); \ |
| 101 uint8x8_t u = vld1_u8((src_uv) + i); \ | 119 uint8x8_t u = vld1_u8((src_uv) + i); \ |
| 102 uint8x8_t v = vld1_u8((src_uv) + i + 16); \ | 120 uint8x8_t v = vld1_u8((src_uv) + i + 16); \ |
| 103 int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \ | 121 const int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \ |
| 104 int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \ | 122 const int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \ |
| 105 int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \ | 123 const int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \ |
| 106 \ | 124 int32x4_t yl = vmull_lane_s16(vget_low_s16(yy), cf16, 0); \ |
| 107 int16x8_t ud = vshlq_n_s16(uu, 1); \ | 125 int32x4_t yh = vmull_lane_s16(vget_high_s16(yy), cf16, 0); \ |
| 108 int16x8_t vd = vshlq_n_s16(vv, 1); \ | 126 const int32x4_t rl = vmlal_lane_s16(yl, vget_low_s16(vv), cf16, 1);\ |
| 109 \ | 127 const int32x4_t rh = vmlal_lane_s16(yh, vget_high_s16(vv), cf16, 1);\ |
| 110 int32x4_t vrl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(vv), 1), \ | 128 int32x4_t gl = vmlsl_lane_s16(yl, vget_low_s16(uu), cf16, 2); \ |
| 111 vget_low_s16(vd), cf16, 0); \ | 129 int32x4_t gh = vmlsl_lane_s16(yh, vget_high_s16(uu), cf16, 2); \ |
| 112 int32x4_t vrh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(vv), 1), \ | 130 const int32x4_t bl = vmovl_s16(vget_low_s16(uu)); \ |
| 113 vget_high_s16(vd), cf16, 0); \ | 131 const int32x4_t bh = vmovl_s16(vget_high_s16(uu)); \ |
| 114 int16x8_t vr = vcombine_s16(vrshrn_n_s32(vrl, 16), \ | 132 gl = vmlsl_lane_s16(gl, vget_low_s16(vv), cf16, 3); \ |
| 115 vrshrn_n_s32(vrh, 16)); \ | 133 gh = vmlsl_lane_s16(gh, vget_high_s16(vv), cf16, 3); \ |
| 116 \ | 134 yl = vmlaq_lane_s32(yl, bl, cf32, 0); \ |
| 117 int32x4_t vl = vmovl_s16(vget_low_s16(vv)); \ | 135 yh = vmlaq_lane_s32(yh, bh, cf32, 0); \ |
| 118 int32x4_t vh = vmovl_s16(vget_high_s16(vv)); \ | 136 /* vrshrn_n_s32() already incorporates the rounding constant */ \ |
| 119 int32x4_t ugl = vmlal_lane_s16(vl, vget_low_s16(uu), cf16, 1); \ | 137 y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, YUV_FIX2), \ |
| 120 int32x4_t ugh = vmlal_lane_s16(vh, vget_high_s16(uu), cf16, 1); \ | 138 vrshrn_n_s32(rh, YUV_FIX2))); \ |
| 121 int32x4_t gcl = vqdmlal_lane_s16(ugl, vget_low_s16(vv), cf16, 2); \ | 139 u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, YUV_FIX2), \ |
| 122 int32x4_t gch = vqdmlal_lane_s16(ugh, vget_high_s16(vv), cf16, 2); \ | 140 vrshrn_n_s32(gh, YUV_FIX2))); \ |
| 123 int16x8_t gc = vcombine_s16(vrshrn_n_s32(gcl, 16), \ | 141 v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(yl, YUV_FIX2), \ |
| 124 vrshrn_n_s32(gch, 16)); \ | 142 vrshrn_n_s32(yh, YUV_FIX2))); \ |
| 125 \ | 143 STORE_ ## FMT(out + off, y, u, v); \ |
| 126 int32x4_t ubl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(uu), 1), \ | |
| 127 vget_low_s16(ud), cf16, 3); \ | |
| 128 int32x4_t ubh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(uu), 1), \ | |
| 129 vget_high_s16(ud), cf16, 3); \ | |
| 130 int16x8_t ub = vcombine_s16(vrshrn_n_s32(ubl, 16), \ | |
| 131 vrshrn_n_s32(ubh, 16)); \ | |
| 132 \ | |
| 133 int32x4_t rl = vaddl_s16(vget_low_s16(yy), vget_low_s16(vr)); \ | |
| 134 int32x4_t rh = vaddl_s16(vget_high_s16(yy), vget_high_s16(vr)); \ | |
| 135 int32x4_t gl = vsubl_s16(vget_low_s16(yy), vget_low_s16(gc)); \ | |
| 136 int32x4_t gh = vsubl_s16(vget_high_s16(yy), vget_high_s16(gc)); \ | |
| 137 int32x4_t bl = vaddl_s16(vget_low_s16(yy), vget_low_s16(ub)); \ | |
| 138 int32x4_t bh = vaddl_s16(vget_high_s16(yy), vget_high_s16(ub)); \ | |
| 139 \ | |
| 140 rl = vmulq_lane_s32(rl, cf32, 0); \ | |
| 141 rh = vmulq_lane_s32(rh, cf32, 0); \ | |
| 142 gl = vmulq_lane_s32(gl, cf32, 0); \ | |
| 143 gh = vmulq_lane_s32(gh, cf32, 0); \ | |
| 144 bl = vmulq_lane_s32(bl, cf32, 0); \ | |
| 145 bh = vmulq_lane_s32(bh, cf32, 0); \ | |
| 146 \ | |
| 147 y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, 16), \ | |
| 148 vrshrn_n_s32(rh, 16))); \ | |
| 149 u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, 16), \ | |
| 150 vrshrn_n_s32(gh, 16))); \ | |
| 151 v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(bl, 16), \ | |
| 152 vrshrn_n_s32(bh, 16))); \ | |
| 153 STR_ ## FMT(out + off, y, u, v); \ | |
| 154 } \ | 144 } \ |
| 155 } | 145 } |
| 156 | 146 |
| 157 #define v255 vmov_n_u8(255) | 147 #define CONVERT1(FUNC, XSTEP, N, src_y, src_uv, rgb, cur_x) { \ |
| 158 | |
| 159 #define STR_Rgb(out, r, g, b) do { \ | |
| 160 const uint8x8x3_t r_g_b = {{ r, g, b }}; \ | |
| 161 vst3_u8(out, r_g_b); \ | |
| 162 } while (0) | |
| 163 | |
| 164 #define STR_Bgr(out, r, g, b) do { \ | |
| 165 const uint8x8x3_t b_g_r = {{ b, g, r }}; \ | |
| 166 vst3_u8(out, b_g_r); \ | |
| 167 } while (0) | |
| 168 | |
| 169 #define STR_Rgba(out, r, g, b) do { \ | |
| 170 const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \ | |
| 171 vst4_u8(out, r_g_b_v255); \ | |
| 172 } while (0) | |
| 173 | |
| 174 #define STR_Bgra(out, r, g, b) do { \ | |
| 175 const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \ | |
| 176 vst4_u8(out, b_g_r_v255); \ | |
| 177 } while (0) | |
| 178 | |
| 179 #define CONVERT1(FMT, XSTEP, N, src_y, src_uv, rgb, cur_x) { \ | |
| 180 int i; \ | 148 int i; \ |
| 181 for (i = 0; i < N; i++) { \ | 149 for (i = 0; i < N; i++) { \ |
| 182 int off = ((cur_x) + i) * XSTEP; \ | 150 const int off = ((cur_x) + i) * XSTEP; \ |
| 183 int y = src_y[(cur_x) + i]; \ | 151 const int y = src_y[(cur_x) + i]; \ |
| 184 int u = (src_uv)[i]; \ | 152 const int u = (src_uv)[i]; \ |
| 185 int v = (src_uv)[i + 16]; \ | 153 const int v = (src_uv)[i + 16]; \ |
| 186 VP8YuvTo ## FMT(y, u, v, rgb + off); \ | 154 FUNC(y, u, v, rgb + off); \ |
| 187 } \ | 155 } \ |
| 188 } | 156 } |
| 189 | 157 |
| 190 #define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \ | 158 #define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \ |
| 191 top_dst, bottom_dst, cur_x, len) { \ | 159 top_dst, bottom_dst, cur_x, len) { \ |
| 192 if (top_y) { \ | 160 CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \ |
| 193 CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \ | 161 if (bottom_y != NULL) { \ |
| 194 } \ | |
| 195 if (bottom_y) { \ | |
| 196 CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \ | 162 CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \ |
| 197 } \ | 163 } \ |
| 198 } | 164 } |
| 199 | 165 |
| 200 #define CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, uv, \ | 166 #define CONVERT2RGB_1(FUNC, XSTEP, top_y, bottom_y, uv, \ |
| 201 top_dst, bottom_dst, cur_x, len) { \ | 167 top_dst, bottom_dst, cur_x, len) { \ |
| 202 if (top_y) { \ | 168 CONVERT1(FUNC, XSTEP, len, top_y, uv, top_dst, cur_x); \ |
| 203 CONVERT1(FMT, XSTEP, len, top_y, uv, top_dst, cur_x); \ | 169 if (bottom_y != NULL) { \ |
| 204 } \ | 170 CONVERT1(FUNC, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \ |
| 205 if (bottom_y) { \ | |
| 206 CONVERT1(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \ | |
| 207 } \ | 171 } \ |
| 208 } | 172 } |
| 209 | 173 |
| 210 #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ | 174 #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ |
| 211 static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ | 175 static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ |
| 212 const uint8_t *top_u, const uint8_t *top_v, \ | 176 const uint8_t *top_u, const uint8_t *top_v, \ |
| 213 const uint8_t *cur_u, const uint8_t *cur_v, \ | 177 const uint8_t *cur_u, const uint8_t *cur_v, \ |
| 214 uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ | 178 uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ |
| 215 int block; \ | 179 int block; \ |
| 216 /* 16 byte aligned array to cache reconstructed u and v */ \ | 180 /* 16 byte aligned array to cache reconstructed u and v */ \ |
| 217 uint8_t uv_buf[2 * 32 + 15]; \ | 181 uint8_t uv_buf[2 * 32 + 15]; \ |
| 218 uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ | 182 uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ |
| 219 const int uv_len = (len + 1) >> 1; \ | 183 const int uv_len = (len + 1) >> 1; \ |
| 220 /* 9 pixels must be read-able for each block */ \ | 184 /* 9 pixels must be read-able for each block */ \ |
| 221 const int num_blocks = (uv_len - 1) >> 3; \ | 185 const int num_blocks = (uv_len - 1) >> 3; \ |
| 222 const int leftover = uv_len - num_blocks * 8; \ | 186 const int leftover = uv_len - num_blocks * 8; \ |
| 223 const int last_pos = 1 + 16 * num_blocks; \ | 187 const int last_pos = 1 + 16 * num_blocks; \ |
| 224 \ | 188 \ |
| 225 const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \ | 189 const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \ |
| 226 const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \ | 190 const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \ |
| 227 \ | 191 \ |
| 228 const int16x4_t cf16 = vld1_s16(coef); \ | 192 const int16x4_t cf16 = vld1_s16(kCoeffs); \ |
| 229 const int32x2_t cf32 = vmov_n_s32(CY); \ | 193 const int32x2_t cf32 = vmov_n_s32(kUToB); \ |
| 230 const uint8x8_t u16 = vmov_n_u8(16); \ | 194 const uint8x8_t u16 = vmov_n_u8(16); \ |
| 231 const uint8x8_t u128 = vmov_n_u8(128); \ | 195 const uint8x8_t u128 = vmov_n_u8(128); \ |
| 232 \ | 196 \ |
| 233 /* Treat the first pixel in regular way */ \ | 197 /* Treat the first pixel in regular way */ \ |
| 234 if (top_y) { \ | 198 assert(top_y != NULL); \ |
| 199 { \ |
| 235 const int u0 = (top_u[0] + u_diag) >> 1; \ | 200 const int u0 = (top_u[0] + u_diag) >> 1; \ |
| 236 const int v0 = (top_v[0] + v_diag) >> 1; \ | 201 const int v0 = (top_v[0] + v_diag) >> 1; \ |
| 237 VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \ | 202 VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \ |
| 238 } \ | 203 } \ |
| 239 if (bottom_y) { \ | 204 if (bottom_y != NULL) { \ |
| 240 const int u0 = (cur_u[0] + u_diag) >> 1; \ | 205 const int u0 = (cur_u[0] + u_diag) >> 1; \ |
| 241 const int v0 = (cur_v[0] + v_diag) >> 1; \ | 206 const int v0 = (cur_v[0] + v_diag) >> 1; \ |
| 242 VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \ | 207 VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \ |
| 243 } \ | 208 } \ |
| 244 \ | 209 \ |
| 245 for (block = 0; block < num_blocks; ++block) { \ | 210 for (block = 0; block < num_blocks; ++block) { \ |
| 246 UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \ | 211 UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \ |
| 247 UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \ | 212 UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \ |
| 248 CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \ | 213 CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \ |
| 249 top_dst, bottom_dst, 16 * block + 1, 16); \ | 214 top_dst, bottom_dst, 16 * block + 1, 16); \ |
| 250 top_u += 8; \ | 215 top_u += 8; \ |
| 251 cur_u += 8; \ | 216 cur_u += 8; \ |
| 252 top_v += 8; \ | 217 top_v += 8; \ |
| 253 cur_v += 8; \ | 218 cur_v += 8; \ |
| 254 } \ | 219 } \ |
| 255 \ | 220 \ |
| 256 UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \ | 221 UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \ |
| 257 UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \ | 222 UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \ |
| 258 CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, r_uv, \ | 223 CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv, \ |
| 259 top_dst, bottom_dst, last_pos, len - last_pos); \ | 224 top_dst, bottom_dst, last_pos, len - last_pos); \ |
| 260 } | 225 } |
| 261 | 226 |
| 262 // NEON variants of the fancy upsampler. | 227 // NEON variants of the fancy upsampler. |
| 263 NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3) | 228 NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3) |
| 264 NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3) | 229 NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3) |
| 265 NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4) | 230 NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4) |
| 266 NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4) | 231 NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4) |
| 267 | 232 |
| 268 #endif // FANCY_UPSAMPLING | 233 #endif // FANCY_UPSAMPLING |
| 269 | 234 |
| 270 #endif // WEBP_USE_NEON | 235 #endif // WEBP_USE_NEON |
| 271 | 236 |
| 272 //------------------------------------------------------------------------------ | 237 //------------------------------------------------------------------------------ |
| 273 | 238 |
| 239 #ifdef FANCY_UPSAMPLING |
| 240 |
| 274 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; | 241 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; |
| 275 | 242 |
| 276 void WebPInitUpsamplersNEON(void) { | 243 void WebPInitUpsamplersNEON(void) { |
| 277 #if defined(WEBP_USE_NEON) | 244 #if defined(WEBP_USE_NEON) |
| 278 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON; | 245 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON; |
| 279 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON; | 246 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON; |
| 280 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON; | 247 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON; |
| 281 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON; | 248 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON; |
| 282 #endif // WEBP_USE_NEON | 249 #endif // WEBP_USE_NEON |
| 283 } | 250 } |
| 284 | 251 |
| 285 void WebPInitPremultiplyNEON(void) { | 252 void WebPInitPremultiplyNEON(void) { |
| 286 #if defined(WEBP_USE_NEON) | 253 #if defined(WEBP_USE_NEON) |
| 287 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON; | 254 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON; |
| 288 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON; | 255 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON; |
| 289 #endif // WEBP_USE_NEON | 256 #endif // WEBP_USE_NEON |
| 290 } | 257 } |
| 291 | 258 |
| 292 #if defined(__cplusplus) || defined(c_plusplus) | 259 #else |
| 293 } // extern "C" | 260 |
| 294 #endif | 261 // this empty function is to avoid an empty .o |
| 262 void WebPInitPremultiplyNEON(void) {} |
| 263 |
| 264 #endif // FANCY_UPSAMPLING |
| 265 |
| OLD | NEW |