| Index: third_party/libpng/arm/arm_palette.c
|
| diff --git a/third_party/libpng/arm/arm_palette.c b/third_party/libpng/arm/arm_palette.c
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..df7bcef8ce7dc8219c134426d433d4189cbaf7aa
|
| --- /dev/null
|
| +++ b/third_party/libpng/arm/arm_palette.c
|
| @@ -0,0 +1,77 @@
|
| +/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
|
| + *
|
| + * Copyright (c) 2017 ARM Limited
|
| + * Copyright (c) 2017 Glenn Randers-Pehrson
|
| + * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
|
| + *
|
| + * This code is released under the libpng license.
|
| + * For conditions of distribution and use, see the disclaimer
|
| + * and license in png.h
|
| + */
|
| +
|
| +#include "../pngpriv.h"
|
| +
|
| +#if PNG_ARM_NEON_IMPLEMENTATION == 1
|
| +
|
| +#include <arm_neon.h>
|
| +
|
| +/* Build an RGBA palette from the RGB and separate alpha palettes. */
|
| +void
|
| +png_riffle_palette(png_structrp png_ptr, png_row_infop row_info)
|
| +{
|
| + png_const_colorp palette = png_ptr->palette;
|
| + png_bytep riffled_palette = png_ptr->row_tmp_palette;
|
| + png_const_bytep trans_alpha = png_ptr->trans_alpha;
|
| + int num_trans = png_ptr->num_trans;
|
| +
|
| + int i;
|
| + for (i = 0; i < (1 << row_info->bit_depth); i++) {
|
| + riffled_palette[(i << 2) + 0] = palette[i].red;
|
| + riffled_palette[(i << 2) + 1] = palette[i].green;
|
| + riffled_palette[(i << 2) + 2] = palette[i].blue;
|
| + if (i >= num_trans) {
|
| + riffled_palette[(i << 2) + 3] = 0xff;
|
| + } else {
|
| + riffled_palette[(i << 2) + 3] = trans_alpha[i];
|
| + }
|
| + }
|
| +}
|
| +
|
| +/* Expands a palettized row into RGBA. */
|
| +int
|
| +png_do_expand_palette_neon(png_structrp png_ptr, png_row_infop row_info,
|
| + png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp)
|
| +{
|
| +
|
| + png_uint_32 row_width = row_info->width;
|
| + const png_uint_32 *riffled_palette = (const png_uint_32*)png_ptr->row_tmp_palette;
|
| + int i;
|
| +
|
| + if (row_width >= 4) {
|
| + /* This function originally gets the last byte of the output row
|
| + The NEON part writes forward from a given position, so we have
|
| + to seek this back by 4 pixels x 4 bytes, for a total offset of 16.
|
| + We should only update the output pointer if the loop's going to run.*/
|
| + *ddp = *ddp - 15;
|
| + }
|
| +
|
| + for(i = 0; i < row_width; i += 4) {
|
| + uint32x4_t cur;
|
| + png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
|
| + cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
|
| + cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
|
| + cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
|
| + cur = vld1q_lane_u32(riffled_palette + *(sp), cur, 3);
|
| + vst1q_u32((void *)dp, cur);
|
| + }
|
| + if (i != row_width) {
|
| + i -= 4; /* Remove the amount that wasn't processed */
|
| + }
|
| +
|
| + /* Decrement output pointers. */
|
| + *ssp = *ssp - i;
|
| + *ddp = *ddp - (i << 2);
|
| + return i;
|
| +}
|
| +
|
| +#endif /* PNG_ARM_NEON_IMPLEMENTATION */
|
|
|