| Index: simd/jsimd_arm64.c
|
| diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm64.c
|
| similarity index 64%
|
| copy from simd/jsimd_arm.c
|
| copy to simd/jsimd_arm64.c
|
| index bd717a409798c92e447c91e15235298cb5c81293..65724cb9ff94a816ffd2706e140333f28656039f 100644
|
| --- a/simd/jsimd_arm.c
|
| +++ b/simd/jsimd_arm64.c
|
| @@ -1,18 +1,16 @@
|
| /*
|
| - * jsimd_arm.c
|
| + * jsimd_arm64.c
|
| *
|
| * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| - * Copyright 2009-2011 D. R. Commander
|
| + * Copyright 2009-2011, 2013-2014 D. R. Commander
|
| *
|
| * Based on the x86 SIMD extension for IJG JPEG library,
|
| * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| *
|
| * This file contains the interface between the "normal" portions
|
| - * of the library and the SIMD implementations when running on
|
| - * ARM architecture.
|
| - *
|
| - * Based on the stubs from 'jsimd_none.c'
|
| + * of the library and the SIMD implementations when running on a
|
| + * 64-bit ARM architecture.
|
| */
|
|
|
| #define JPEG_INTERNALS
|
| @@ -29,104 +27,35 @@
|
|
|
| static unsigned int simd_support = ~0;
|
|
|
| -#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
| -
|
| -#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
| -
|
| -LOCAL(int)
|
| -check_feature (char *buffer, char *feature)
|
| -{
|
| - char *p;
|
| - if (*feature == 0)
|
| - return 0;
|
| - if (strncmp(buffer, "Features", 8) != 0)
|
| - return 0;
|
| - buffer += 8;
|
| - while (isspace(*buffer))
|
| - buffer++;
|
| -
|
| - /* Check if 'feature' is present in the buffer as a separate word */
|
| - while ((p = strstr(buffer, feature))) {
|
| - if (p > buffer && !isspace(*(p - 1))) {
|
| - buffer++;
|
| - continue;
|
| - }
|
| - p += strlen(feature);
|
| - if (*p != 0 && !isspace(*p)) {
|
| - buffer++;
|
| - continue;
|
| - }
|
| - return 1;
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -LOCAL(int)
|
| -parse_proc_cpuinfo (int bufsize)
|
| -{
|
| - char *buffer = (char *)malloc(bufsize);
|
| - FILE *fd;
|
| - simd_support = 0;
|
| -
|
| - if (!buffer)
|
| - return 0;
|
| -
|
| - fd = fopen("/proc/cpuinfo", "r");
|
| - if (fd) {
|
| - while (fgets(buffer, bufsize, fd)) {
|
| - if (!strchr(buffer, '\n') && !feof(fd)) {
|
| - /* "impossible" happened - insufficient size of the buffer! */
|
| - fclose(fd);
|
| - free(buffer);
|
| - return 0;
|
| - }
|
| - if (check_feature(buffer, "neon"))
|
| - simd_support |= JSIMD_ARM_NEON;
|
| - }
|
| - fclose(fd);
|
| - }
|
| - free(buffer);
|
| - return 1;
|
| -}
|
| -
|
| -#endif
|
| -
|
| /*
|
| * Check what SIMD accelerations are supported.
|
| *
|
| * FIXME: This code is racy under a multi-threaded environment.
|
| */
|
| +
|
| +/*
|
| + * ARMv8 architectures support NEON extensions by default.
|
| + * It is no longer optional as it was with ARMv7.
|
| + */
|
| +
|
| +
|
| LOCAL(void)
|
| init_simd (void)
|
| {
|
| char *env = NULL;
|
| -#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
| - int bufsize = 1024; /* an initial guess for the line buffer size limit */
|
| -#endif
|
|
|
| if (simd_support != ~0U)
|
| return;
|
|
|
| simd_support = 0;
|
|
|
| -#if defined(__ARM_NEON__)
|
| simd_support |= JSIMD_ARM_NEON;
|
| -#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
| - /* We still have a chance to use NEON regardless of globally used
|
| - * -mcpu/-mfpu options passed to gcc by performing runtime detection via
|
| - * /proc/cpuinfo parsing on linux/android */
|
| - while (!parse_proc_cpuinfo(bufsize)) {
|
| - bufsize *= 2;
|
| - if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
|
| - break;
|
| - }
|
| -#endif
|
|
|
| /* Force different settings through environment variables */
|
| - env = getenv("JSIMD_FORCE_ARM_NEON");
|
| + env = getenv("JSIMD_FORCENEON");
|
| if ((env != NULL) && (strcmp(env, "1") == 0))
|
| simd_support &= JSIMD_ARM_NEON;
|
| - env = getenv("JSIMD_FORCE_NO_SIMD");
|
| + env = getenv("JSIMD_FORCENONE");
|
| if ((env != NULL) && (strcmp(env, "1") == 0))
|
| simd_support = 0;
|
| }
|
| @@ -136,17 +65,6 @@ jsimd_can_rgb_ycc (void)
|
| {
|
| init_simd();
|
|
|
| - /* The code is optimised for these values only */
|
| - if (BITS_IN_JSAMPLE != 8)
|
| - return 0;
|
| - if (sizeof(JDIMENSION) != 4)
|
| - return 0;
|
| - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
| - return 0;
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - return 1;
|
| -
|
| return 0;
|
| }
|
|
|
| @@ -170,6 +88,24 @@ jsimd_can_ycc_rgb (void)
|
| return 0;
|
| if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
| return 0;
|
| +
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + return 1;
|
| +
|
| + return 0;
|
| +}
|
| +
|
| +GLOBAL(int)
|
| +jsimd_can_ycc_rgb565 (void)
|
| +{
|
| + init_simd();
|
| +
|
| + /* The code is optimised for these values only */
|
| + if (BITS_IN_JSAMPLE != 8)
|
| + return 0;
|
| + if (sizeof(JDIMENSION) != 4)
|
| + return 0;
|
| +
|
| if (simd_support & JSIMD_ARM_NEON)
|
| return 1;
|
|
|
| @@ -181,40 +117,6 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| JDIMENSION output_row, int num_rows)
|
| {
|
| - void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
| -
|
| - switch(cinfo->in_color_space)
|
| - {
|
| - case JCS_EXT_RGB:
|
| - neonfct=jsimd_extrgb_ycc_convert_neon;
|
| - break;
|
| - case JCS_EXT_RGBX:
|
| - case JCS_EXT_RGBA:
|
| - neonfct=jsimd_extrgbx_ycc_convert_neon;
|
| - break;
|
| - case JCS_EXT_BGR:
|
| - neonfct=jsimd_extbgr_ycc_convert_neon;
|
| - break;
|
| - case JCS_EXT_BGRX:
|
| - case JCS_EXT_BGRA:
|
| - neonfct=jsimd_extbgrx_ycc_convert_neon;
|
| - break;
|
| - case JCS_EXT_XBGR:
|
| - case JCS_EXT_ABGR:
|
| - neonfct=jsimd_extxbgr_ycc_convert_neon;
|
| - break;
|
| - case JCS_EXT_XRGB:
|
| - case JCS_EXT_ARGB:
|
| - neonfct=jsimd_extxrgb_ycc_convert_neon;
|
| - break;
|
| - default:
|
| - neonfct=jsimd_extrgb_ycc_convert_neon;
|
| - break;
|
| - }
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - neonfct(cinfo->image_width, input_buf,
|
| - output_buf, output_row, num_rows);
|
| }
|
|
|
| GLOBAL(void)
|
| @@ -231,8 +133,7 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| {
|
| void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
|
|
|
| - switch(cinfo->out_color_space)
|
| - {
|
| + switch(cinfo->out_color_space) {
|
| case JCS_EXT_RGB:
|
| neonfct=jsimd_ycc_extrgb_convert_neon;
|
| break;
|
| @@ -255,14 +156,23 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| case JCS_EXT_ARGB:
|
| neonfct=jsimd_ycc_extxrgb_convert_neon;
|
| break;
|
| - default:
|
| + default:
|
| neonfct=jsimd_ycc_extrgb_convert_neon;
|
| break;
|
| }
|
|
|
| if (simd_support & JSIMD_ARM_NEON)
|
| - neonfct(cinfo->output_width, input_buf,
|
| - input_row, output_buf, num_rows);
|
| + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
| +}
|
| +
|
| +GLOBAL(void)
|
| +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| +{
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
|
| + output_buf, num_rows);
|
| }
|
|
|
| GLOBAL(int)
|
| @@ -311,7 +221,7 @@ jsimd_can_h2v1_upsample (void)
|
|
|
| GLOBAL(void)
|
| jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
| - jpeg_component_info * compptr,
|
| + jpeg_component_info * compptr,
|
| JSAMPARRAY input_data,
|
| JSAMPARRAY * output_data_ptr)
|
| {
|
| @@ -319,7 +229,7 @@ jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
|
|
| GLOBAL(void)
|
| jsimd_h2v1_upsample (j_decompress_ptr cinfo,
|
| - jpeg_component_info * compptr,
|
| + jpeg_component_info * compptr,
|
| JSAMPARRAY input_data,
|
| JSAMPARRAY * output_data_ptr)
|
| {
|
| @@ -338,21 +248,12 @@ jsimd_can_h2v1_fancy_upsample (void)
|
| {
|
| init_simd();
|
|
|
| - /* The code is optimised for these values only */
|
| - if (BITS_IN_JSAMPLE != 8)
|
| - return 0;
|
| - if (sizeof(JDIMENSION) != 4)
|
| - return 0;
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - return 1;
|
| -
|
| return 0;
|
| }
|
|
|
| GLOBAL(void)
|
| jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
|
| - jpeg_component_info * compptr,
|
| + jpeg_component_info * compptr,
|
| JSAMPARRAY input_data,
|
| JSAMPARRAY * output_data_ptr)
|
| {
|
| @@ -360,13 +261,10 @@ jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
|
|
|
| GLOBAL(void)
|
| jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
|
| - jpeg_component_info * compptr,
|
| + jpeg_component_info * compptr,
|
| JSAMPARRAY input_data,
|
| JSAMPARRAY * output_data_ptr)
|
| {
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
|
| - compptr->downsampled_width, input_data, output_data_ptr);
|
| }
|
|
|
| GLOBAL(int)
|
| @@ -406,19 +304,6 @@ jsimd_can_convsamp (void)
|
| {
|
| init_simd();
|
|
|
| - /* The code is optimised for these values only */
|
| - if (DCTSIZE != 8)
|
| - return 0;
|
| - if (BITS_IN_JSAMPLE != 8)
|
| - return 0;
|
| - if (sizeof(JDIMENSION) != 4)
|
| - return 0;
|
| - if (sizeof(DCTELEM) != 2)
|
| - return 0;
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - return 1;
|
| -
|
| return 0;
|
| }
|
|
|
| @@ -434,8 +319,6 @@ GLOBAL(void)
|
| jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
|
| DCTELEM * workspace)
|
| {
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - jsimd_convsamp_neon(sample_data, start_col, workspace);
|
| }
|
|
|
| GLOBAL(void)
|
| @@ -457,15 +340,6 @@ jsimd_can_fdct_ifast (void)
|
| {
|
| init_simd();
|
|
|
| - /* The code is optimised for these values only */
|
| - if (DCTSIZE != 8)
|
| - return 0;
|
| - if (sizeof(DCTELEM) != 2)
|
| - return 0;
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - return 1;
|
| -
|
| return 0;
|
| }
|
|
|
| @@ -485,8 +359,6 @@ jsimd_fdct_islow (DCTELEM * data)
|
| GLOBAL(void)
|
| jsimd_fdct_ifast (DCTELEM * data)
|
| {
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - jsimd_fdct_ifast_neon(data);
|
| }
|
|
|
| GLOBAL(void)
|
| @@ -499,17 +371,6 @@ jsimd_can_quantize (void)
|
| {
|
| init_simd();
|
|
|
| - /* The code is optimised for these values only */
|
| - if (DCTSIZE != 8)
|
| - return 0;
|
| - if (sizeof(JCOEF) != 2)
|
| - return 0;
|
| - if (sizeof(DCTELEM) != 2)
|
| - return 0;
|
| -
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - return 1;
|
| -
|
| return 0;
|
| }
|
|
|
| @@ -525,8 +386,6 @@ GLOBAL(void)
|
| jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
|
| DCTELEM * workspace)
|
| {
|
| - if (simd_support & JSIMD_ARM_NEON)
|
| - jsimd_quantize_neon(coef_block, divisors, workspace);
|
| }
|
|
|
| GLOBAL(void)
|
| @@ -552,7 +411,7 @@ jsimd_can_idct_2x2 (void)
|
| if (sizeof(ISLOW_MULT_TYPE) != 2)
|
| return 0;
|
|
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| return 1;
|
|
|
| return 0;
|
| @@ -575,7 +434,7 @@ jsimd_can_idct_4x4 (void)
|
| if (sizeof(ISLOW_MULT_TYPE) != 2)
|
| return 0;
|
|
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| return 1;
|
|
|
| return 0;
|
| @@ -586,8 +445,9 @@ jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| JDIMENSION output_col)
|
| {
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| - jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
|
| + output_col);
|
| }
|
|
|
| GLOBAL(void)
|
| @@ -595,8 +455,9 @@ jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| JDIMENSION output_col)
|
| {
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| - jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
|
| + output_col);
|
| }
|
|
|
| GLOBAL(int)
|
| @@ -641,7 +502,7 @@ jsimd_can_idct_ifast (void)
|
| if (IFAST_SCALE_BITS != 2)
|
| return 0;
|
|
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| return 1;
|
|
|
| return 0;
|
| @@ -657,26 +518,27 @@ jsimd_can_idct_float (void)
|
|
|
| GLOBAL(void)
|
| jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| - JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| - JDIMENSION output_col)
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| + JDIMENSION output_col)
|
| {
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| - jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
|
| + output_col);
|
| }
|
|
|
| GLOBAL(void)
|
| jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| - JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| - JDIMENSION output_col)
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| + JDIMENSION output_col)
|
| {
|
| - if ((simd_support & JSIMD_ARM_NEON))
|
| - jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
|
| + if (simd_support & JSIMD_ARM_NEON)
|
| + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
|
| + output_col);
|
| }
|
|
|
| GLOBAL(void)
|
| jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| - JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| - JDIMENSION output_col)
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| + JDIMENSION output_col)
|
| {
|
| }
|
| -
|
|
|