| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  * jsimd_arm64.c | 2  * jsimd_arm64.c | 
| 3  * | 3  * | 
| 4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 
| 5  * Copyright 2009-2011, 2013-2014 D. R. Commander | 5  * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander | 
|  | 6  * Copyright 2015-2016 Matthieu Darbois | 
| 6  * | 7  * | 
| 7  * Based on the x86 SIMD extension for IJG JPEG library, | 8  * Based on the x86 SIMD extension for IJG JPEG library, | 
| 8  * Copyright (C) 1999-2006, MIYASAKA Masaru. | 9  * Copyright (C) 1999-2006, MIYASAKA Masaru. | 
| 9  * For conditions of distribution and use, see copyright notice in jsimdext.inc | 10  * For conditions of distribution and use, see copyright notice in jsimdext.inc | 
| 10  * | 11  * | 
| 11  * This file contains the interface between the "normal" portions | 12  * This file contains the interface between the "normal" portions | 
| 12  * of the library and the SIMD implementations when running on a | 13  * of the library and the SIMD implementations when running on a | 
| 13  * 64-bit ARM architecture. | 14  * 64-bit ARM architecture. | 
| 14  */ | 15  */ | 
| 15 | 16 | 
| 16 #define JPEG_INTERNALS | 17 #define JPEG_INTERNALS | 
| 17 #include "../jinclude.h" | 18 #include "../jinclude.h" | 
| 18 #include "../jpeglib.h" | 19 #include "../jpeglib.h" | 
| 19 #include "../jsimd.h" | 20 #include "../jsimd.h" | 
| 20 #include "../jdct.h" | 21 #include "../jdct.h" | 
| 21 #include "../jsimddct.h" | 22 #include "../jsimddct.h" | 
| 22 #include "jsimd.h" | 23 #include "jsimd.h" | 
| 23 | 24 | 
| 24 #include <stdio.h> | 25 #include <stdio.h> | 
| 25 #include <string.h> | 26 #include <string.h> | 
| 26 #include <ctype.h> | 27 #include <ctype.h> | 
| 27 | 28 | 
|  | 29 #define JSIMD_FASTLD3 1 | 
|  | 30 #define JSIMD_FASTST3 2 | 
|  | 31 #define JSIMD_FASTTBL 4 | 
|  | 32 | 
| 28 static unsigned int simd_support = ~0; | 33 static unsigned int simd_support = ~0; | 
|  | 34 static unsigned int simd_huffman = 1; | 
|  | 35 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | | 
|  | 36                                     JSIMD_FASTTBL; | 
|  | 37 | 
|  | 38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) | 
|  | 39 | 
|  | 40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) | 
|  | 41 | 
|  | 42 LOCAL(int) | 
|  | 43 check_cpuinfo (char *buffer, const char *field, char *value) | 
|  | 44 { | 
|  | 45   char *p; | 
|  | 46   if (*value == 0) | 
|  | 47     return 0; | 
|  | 48   if (strncmp(buffer, field, strlen(field)) != 0) | 
|  | 49     return 0; | 
|  | 50   buffer += strlen(field); | 
|  | 51   while (isspace(*buffer)) | 
|  | 52     buffer++; | 
|  | 53 | 
|  | 54   /* Check if 'value' is present in the buffer as a separate word */ | 
|  | 55   while ((p = strstr(buffer, value))) { | 
|  | 56     if (p > buffer && !isspace(*(p - 1))) { | 
|  | 57       buffer++; | 
|  | 58       continue; | 
|  | 59     } | 
|  | 60     p += strlen(value); | 
|  | 61     if (*p != 0 && !isspace(*p)) { | 
|  | 62       buffer++; | 
|  | 63       continue; | 
|  | 64     } | 
|  | 65     return 1; | 
|  | 66   } | 
|  | 67   return 0; | 
|  | 68 } | 
|  | 69 | 
|  | 70 LOCAL(int) | 
|  | 71 parse_proc_cpuinfo (int bufsize) | 
|  | 72 { | 
|  | 73   char *buffer = (char *)malloc(bufsize); | 
|  | 74   FILE *fd; | 
|  | 75 | 
|  | 76   if (!buffer) | 
|  | 77     return 0; | 
|  | 78 | 
|  | 79   fd = fopen("/proc/cpuinfo", "r"); | 
|  | 80   if (fd) { | 
|  | 81     while (fgets(buffer, bufsize, fd)) { | 
|  | 82       if (!strchr(buffer, '\n') && !feof(fd)) { | 
|  | 83         /* "impossible" happened - insufficient size of the buffer! */ | 
|  | 84         fclose(fd); | 
|  | 85         free(buffer); | 
|  | 86         return 0; | 
|  | 87       } | 
|  | 88       if (check_cpuinfo(buffer, "CPU part", "0xd03") || | 
|  | 89           check_cpuinfo(buffer, "CPU part", "0xd07")) | 
|  | 90         /* The Cortex-A53 has a slow tbl implementation.  We can gain a few | 
|  | 91            percent speedup by disabling the use of that instruction.  The | 
|  | 92            speedup on Cortex-A57 is more subtle but still measurable. */ | 
|  | 93         simd_features &= ~JSIMD_FASTTBL; | 
|  | 94       else if (check_cpuinfo(buffer, "CPU part", "0x0a1")) | 
|  | 95         /* The SIMD version of Huffman encoding is slower than the C version on | 
|  | 96            Cavium ThunderX.  Also, ld3 and st3 are abyssmally slow on that | 
|  | 97            CPU. */ | 
|  | 98         simd_huffman = simd_features = 0; | 
|  | 99     } | 
|  | 100     fclose(fd); | 
|  | 101   } | 
|  | 102   free(buffer); | 
|  | 103   return 1; | 
|  | 104 } | 
|  | 105 | 
|  | 106 #endif | 
| 29 | 107 | 
| 30 /* | 108 /* | 
| 31  * Check what SIMD accelerations are supported. | 109  * Check what SIMD accelerations are supported. | 
| 32  * | 110  * | 
| 33  * FIXME: This code is racy under a multi-threaded environment. | 111  * FIXME: This code is racy under a multi-threaded environment. | 
| 34  */ | 112  */ | 
| 35 | 113 | 
| 36 /* | 114 /* | 
| 37  * ARMv8 architectures support NEON extensions by default. | 115  * ARMv8 architectures support NEON extensions by default. | 
| 38  * It is no longer optional as it was with ARMv7. | 116  * It is no longer optional as it was with ARMv7. | 
| 39  */ | 117  */ | 
| 40 | 118 | 
| 41 | 119 | 
| 42 LOCAL(void) | 120 LOCAL(void) | 
| 43 init_simd (void) | 121 init_simd (void) | 
| 44 { | 122 { | 
| 45   char *env = NULL; | 123   char *env = NULL; | 
|  | 124 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) | 
|  | 125   int bufsize = 1024; /* an initial guess for the line buffer size limit */ | 
|  | 126 #endif | 
| 46 | 127 | 
| 47   if (simd_support != ~0U) | 128   if (simd_support != ~0U) | 
| 48     return; | 129     return; | 
| 49 | 130 | 
| 50   simd_support = 0; | 131   simd_support = 0; | 
| 51 | 132 | 
| 52   simd_support |= JSIMD_ARM_NEON; | 133   simd_support |= JSIMD_ARM_NEON; | 
|  | 134 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) | 
|  | 135   while (!parse_proc_cpuinfo(bufsize)) { | 
|  | 136     bufsize *= 2; | 
|  | 137     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) | 
|  | 138       break; | 
|  | 139   } | 
|  | 140 #endif | 
| 53 | 141 | 
| 54   /* Force different settings through environment variables */ | 142   /* Force different settings through environment variables */ | 
| 55   env = getenv("JSIMD_FORCENEON"); | 143   env = getenv("JSIMD_FORCENEON"); | 
| 56   if ((env != NULL) && (strcmp(env, "1") == 0)) | 144   if ((env != NULL) && (strcmp(env, "1") == 0)) | 
| 57     simd_support &= JSIMD_ARM_NEON; | 145     simd_support &= JSIMD_ARM_NEON; | 
| 58   env = getenv("JSIMD_FORCENONE"); | 146   env = getenv("JSIMD_FORCENONE"); | 
| 59   if ((env != NULL) && (strcmp(env, "1") == 0)) | 147   if ((env != NULL) && (strcmp(env, "1") == 0)) | 
| 60     simd_support = 0; | 148     simd_support = 0; | 
|  | 149   env = getenv("JSIMD_NOHUFFENC"); | 
|  | 150   if ((env != NULL) && (strcmp(env, "1") == 0)) | 
|  | 151     simd_huffman = 0; | 
|  | 152   env = getenv("JSIMD_FASTLD3"); | 
|  | 153   if ((env != NULL) && (strcmp(env, "1") == 0)) | 
|  | 154     simd_features |= JSIMD_FASTLD3; | 
|  | 155   if ((env != NULL) && (strcmp(env, "0") == 0)) | 
|  | 156     simd_features &= ~JSIMD_FASTLD3; | 
|  | 157   env = getenv("JSIMD_FASTST3"); | 
|  | 158   if ((env != NULL) && (strcmp(env, "1") == 0)) | 
|  | 159     simd_features |= JSIMD_FASTST3; | 
|  | 160   if ((env != NULL) && (strcmp(env, "0") == 0)) | 
|  | 161     simd_features &= ~JSIMD_FASTST3; | 
| 61 } | 162 } | 
| 62 | 163 | 
| 63 GLOBAL(int) | 164 GLOBAL(int) | 
| 64 jsimd_can_rgb_ycc (void) | 165 jsimd_can_rgb_ycc (void) | 
| 65 { | 166 { | 
| 66   init_simd(); | 167   init_simd(); | 
| 67 | 168 | 
|  | 169   /* The code is optimised for these values only */ | 
|  | 170   if (BITS_IN_JSAMPLE != 8) | 
|  | 171     return 0; | 
|  | 172   if (sizeof(JDIMENSION) != 4) | 
|  | 173     return 0; | 
|  | 174   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 
|  | 175     return 0; | 
|  | 176 | 
|  | 177   if (simd_support & JSIMD_ARM_NEON) | 
|  | 178     return 1; | 
|  | 179 | 
| 68   return 0; | 180   return 0; | 
| 69 } | 181 } | 
| 70 | 182 | 
| 71 GLOBAL(int) | 183 GLOBAL(int) | 
| 72 jsimd_can_rgb_gray (void) | 184 jsimd_can_rgb_gray (void) | 
| 73 { | 185 { | 
| 74   init_simd(); | 186   init_simd(); | 
| 75 | 187 | 
| 76   return 0; | 188   return 0; | 
| 77 } | 189 } | 
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 110     return 1; | 222     return 1; | 
| 111 | 223 | 
| 112   return 0; | 224   return 0; | 
| 113 } | 225 } | 
| 114 | 226 | 
| 115 GLOBAL(void) | 227 GLOBAL(void) | 
| 116 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 228 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 
| 117                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 229                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 
| 118                        JDIMENSION output_row, int num_rows) | 230                        JDIMENSION output_row, int num_rows) | 
| 119 { | 231 { | 
|  | 232   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 
|  | 233 | 
|  | 234   switch(cinfo->in_color_space) { | 
|  | 235     case JCS_EXT_RGB: | 
|  | 236       if (simd_features & JSIMD_FASTLD3) | 
|  | 237         neonfct=jsimd_extrgb_ycc_convert_neon; | 
|  | 238       else | 
|  | 239         neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; | 
|  | 240       break; | 
|  | 241     case JCS_EXT_RGBX: | 
|  | 242     case JCS_EXT_RGBA: | 
|  | 243       neonfct=jsimd_extrgbx_ycc_convert_neon; | 
|  | 244       break; | 
|  | 245     case JCS_EXT_BGR: | 
|  | 246       if (simd_features & JSIMD_FASTLD3) | 
|  | 247         neonfct=jsimd_extbgr_ycc_convert_neon; | 
|  | 248       else | 
|  | 249         neonfct=jsimd_extbgr_ycc_convert_neon_slowld3; | 
|  | 250       break; | 
|  | 251     case JCS_EXT_BGRX: | 
|  | 252     case JCS_EXT_BGRA: | 
|  | 253       neonfct=jsimd_extbgrx_ycc_convert_neon; | 
|  | 254       break; | 
|  | 255     case JCS_EXT_XBGR: | 
|  | 256     case JCS_EXT_ABGR: | 
|  | 257       neonfct=jsimd_extxbgr_ycc_convert_neon; | 
|  | 258       break; | 
|  | 259     case JCS_EXT_XRGB: | 
|  | 260     case JCS_EXT_ARGB: | 
|  | 261       neonfct=jsimd_extxrgb_ycc_convert_neon; | 
|  | 262       break; | 
|  | 263     default: | 
|  | 264       if (simd_features & JSIMD_FASTLD3) | 
|  | 265         neonfct=jsimd_extrgb_ycc_convert_neon; | 
|  | 266       else | 
|  | 267         neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; | 
|  | 268       break; | 
|  | 269   } | 
|  | 270 | 
|  | 271   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 
| 120 } | 272 } | 
| 121 | 273 | 
| 122 GLOBAL(void) | 274 GLOBAL(void) | 
| 123 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 275 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 
| 124                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 276                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 
| 125                         JDIMENSION output_row, int num_rows) | 277                         JDIMENSION output_row, int num_rows) | 
| 126 { | 278 { | 
| 127 } | 279 } | 
| 128 | 280 | 
| 129 GLOBAL(void) | 281 GLOBAL(void) | 
| 130 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 282 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 
| 131                        JSAMPIMAGE input_buf, JDIMENSION input_row, | 283                        JSAMPIMAGE input_buf, JDIMENSION input_row, | 
| 132                        JSAMPARRAY output_buf, int num_rows) | 284                        JSAMPARRAY output_buf, int num_rows) | 
| 133 { | 285 { | 
| 134   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 286   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 
| 135 | 287 | 
| 136   switch(cinfo->out_color_space) { | 288   switch(cinfo->out_color_space) { | 
| 137     case JCS_EXT_RGB: | 289     case JCS_EXT_RGB: | 
| 138       neonfct=jsimd_ycc_extrgb_convert_neon; | 290       if (simd_features & JSIMD_FASTST3) | 
|  | 291         neonfct=jsimd_ycc_extrgb_convert_neon; | 
|  | 292       else | 
|  | 293         neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; | 
| 139       break; | 294       break; | 
| 140     case JCS_EXT_RGBX: | 295     case JCS_EXT_RGBX: | 
| 141     case JCS_EXT_RGBA: | 296     case JCS_EXT_RGBA: | 
| 142       neonfct=jsimd_ycc_extrgbx_convert_neon; | 297       neonfct=jsimd_ycc_extrgbx_convert_neon; | 
| 143       break; | 298       break; | 
| 144     case JCS_EXT_BGR: | 299     case JCS_EXT_BGR: | 
| 145       neonfct=jsimd_ycc_extbgr_convert_neon; | 300       if (simd_features & JSIMD_FASTST3) | 
|  | 301         neonfct=jsimd_ycc_extbgr_convert_neon; | 
|  | 302       else | 
|  | 303         neonfct=jsimd_ycc_extbgr_convert_neon_slowst3; | 
| 146       break; | 304       break; | 
| 147     case JCS_EXT_BGRX: | 305     case JCS_EXT_BGRX: | 
| 148     case JCS_EXT_BGRA: | 306     case JCS_EXT_BGRA: | 
| 149       neonfct=jsimd_ycc_extbgrx_convert_neon; | 307       neonfct=jsimd_ycc_extbgrx_convert_neon; | 
| 150       break; | 308       break; | 
| 151     case JCS_EXT_XBGR: | 309     case JCS_EXT_XBGR: | 
| 152     case JCS_EXT_ABGR: | 310     case JCS_EXT_ABGR: | 
| 153       neonfct=jsimd_ycc_extxbgr_convert_neon; | 311       neonfct=jsimd_ycc_extxbgr_convert_neon; | 
| 154       break; | 312       break; | 
| 155     case JCS_EXT_XRGB: | 313     case JCS_EXT_XRGB: | 
| 156     case JCS_EXT_ARGB: | 314     case JCS_EXT_ARGB: | 
| 157       neonfct=jsimd_ycc_extxrgb_convert_neon; | 315       neonfct=jsimd_ycc_extxrgb_convert_neon; | 
| 158       break; | 316       break; | 
| 159     default: | 317     default: | 
| 160       neonfct=jsimd_ycc_extrgb_convert_neon; | 318       if (simd_features & JSIMD_FASTST3) | 
|  | 319         neonfct=jsimd_ycc_extrgb_convert_neon; | 
|  | 320       else | 
|  | 321         neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; | 
| 161       break; | 322       break; | 
| 162   } | 323   } | 
| 163 | 324 | 
| 164   if (simd_support & JSIMD_ARM_NEON) | 325   neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 
| 165     neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |  | 
| 166 } | 326 } | 
| 167 | 327 | 
| 168 GLOBAL(void) | 328 GLOBAL(void) | 
| 169 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | 329 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | 
| 170                           JSAMPIMAGE input_buf, JDIMENSION input_row, | 330                           JSAMPIMAGE input_buf, JDIMENSION input_row, | 
| 171                           JSAMPARRAY output_buf, int num_rows) | 331                           JSAMPARRAY output_buf, int num_rows) | 
| 172 { | 332 { | 
| 173   if (simd_support & JSIMD_ARM_NEON) | 333   jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, | 
| 174     jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, | 334                                 output_buf, num_rows); | 
| 175                                   output_buf, num_rows); |  | 
| 176 } | 335 } | 
| 177 | 336 | 
| 178 GLOBAL(int) | 337 GLOBAL(int) | 
| 179 jsimd_can_h2v2_downsample (void) | 338 jsimd_can_h2v2_downsample (void) | 
| 180 { | 339 { | 
| 181   init_simd(); | 340   init_simd(); | 
| 182 | 341 | 
|  | 342   /* The code is optimised for these values only */ | 
|  | 343   if (BITS_IN_JSAMPLE != 8) | 
|  | 344     return 0; | 
|  | 345   if (DCTSIZE != 8) | 
|  | 346     return 0; | 
|  | 347   if (sizeof(JDIMENSION) != 4) | 
|  | 348     return 0; | 
|  | 349 | 
|  | 350   if (simd_support & JSIMD_ARM_NEON) | 
|  | 351     return 1; | 
|  | 352 | 
| 183   return 0; | 353   return 0; | 
| 184 } | 354 } | 
| 185 | 355 | 
| 186 GLOBAL(int) | 356 GLOBAL(int) | 
| 187 jsimd_can_h2v1_downsample (void) | 357 jsimd_can_h2v1_downsample (void) | 
| 188 { | 358 { | 
| 189   init_simd(); | 359   init_simd(); | 
| 190 | 360 | 
|  | 361   /* The code is optimised for these values only */ | 
|  | 362   if (BITS_IN_JSAMPLE != 8) | 
|  | 363     return 0; | 
|  | 364   if (DCTSIZE != 8) | 
|  | 365     return 0; | 
|  | 366   if (sizeof(JDIMENSION) != 4) | 
|  | 367     return 0; | 
|  | 368 | 
|  | 369   if (simd_support & JSIMD_ARM_NEON) | 
|  | 370     return 1; | 
|  | 371 | 
| 191   return 0; | 372   return 0; | 
| 192 } | 373 } | 
| 193 | 374 | 
| 194 GLOBAL(void) | 375 GLOBAL(void) | 
| 195 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 376 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, | 
| 196                        JSAMPARRAY input_data, JSAMPARRAY output_data) | 377                        JSAMPARRAY input_data, JSAMPARRAY output_data) | 
| 197 { | 378 { | 
|  | 379   jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, | 
|  | 380                              compptr->v_samp_factor, compptr->width_in_blocks, | 
|  | 381                              input_data, output_data); | 
| 198 } | 382 } | 
| 199 | 383 | 
| 200 GLOBAL(void) | 384 GLOBAL(void) | 
| 201 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 385 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, | 
| 202                        JSAMPARRAY input_data, JSAMPARRAY output_data) | 386                        JSAMPARRAY input_data, JSAMPARRAY output_data) | 
| 203 { | 387 { | 
|  | 388   jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, | 
|  | 389                              compptr->v_samp_factor, compptr->width_in_blocks, | 
|  | 390                              input_data, output_data); | 
| 204 } | 391 } | 
| 205 | 392 | 
| 206 GLOBAL(int) | 393 GLOBAL(int) | 
| 207 jsimd_can_h2v2_upsample (void) | 394 jsimd_can_h2v2_upsample (void) | 
| 208 { | 395 { | 
| 209   init_simd(); | 396   init_simd(); | 
| 210 | 397 | 
| 211   return 0; | 398   return 0; | 
| 212 } | 399 } | 
| 213 | 400 | 
| 214 GLOBAL(int) | 401 GLOBAL(int) | 
| 215 jsimd_can_h2v1_upsample (void) | 402 jsimd_can_h2v1_upsample (void) | 
| 216 { | 403 { | 
| 217   init_simd(); | 404   init_simd(); | 
| 218 | 405 | 
| 219   return 0; | 406   return 0; | 
| 220 } | 407 } | 
| 221 | 408 | 
| 222 GLOBAL(void) | 409 GLOBAL(void) | 
| 223 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 410 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 
| 224                      jpeg_component_info * compptr, | 411                      jpeg_component_info *compptr, | 
| 225                      JSAMPARRAY input_data, | 412                      JSAMPARRAY input_data, | 
| 226                      JSAMPARRAY * output_data_ptr) | 413                      JSAMPARRAY *output_data_ptr) | 
| 227 { | 414 { | 
| 228 } | 415 } | 
| 229 | 416 | 
| 230 GLOBAL(void) | 417 GLOBAL(void) | 
| 231 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 418 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 
| 232                      jpeg_component_info * compptr, | 419                      jpeg_component_info *compptr, | 
| 233                      JSAMPARRAY input_data, | 420                      JSAMPARRAY input_data, | 
| 234                      JSAMPARRAY * output_data_ptr) | 421                      JSAMPARRAY *output_data_ptr) | 
| 235 { | 422 { | 
| 236 } | 423 } | 
| 237 | 424 | 
| 238 GLOBAL(int) | 425 GLOBAL(int) | 
| 239 jsimd_can_h2v2_fancy_upsample (void) | 426 jsimd_can_h2v2_fancy_upsample (void) | 
| 240 { | 427 { | 
| 241   init_simd(); | 428   init_simd(); | 
| 242 | 429 | 
| 243   return 0; | 430   return 0; | 
| 244 } | 431 } | 
| 245 | 432 | 
| 246 GLOBAL(int) | 433 GLOBAL(int) | 
| 247 jsimd_can_h2v1_fancy_upsample (void) | 434 jsimd_can_h2v1_fancy_upsample (void) | 
| 248 { | 435 { | 
| 249   init_simd(); | 436   init_simd(); | 
| 250 | 437 | 
| 251   return 0; | 438   return 0; | 
| 252 } | 439 } | 
| 253 | 440 | 
| 254 GLOBAL(void) | 441 GLOBAL(void) | 
| 255 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 442 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 
| 256                            jpeg_component_info * compptr, | 443                            jpeg_component_info *compptr, | 
| 257                            JSAMPARRAY input_data, | 444                            JSAMPARRAY input_data, | 
| 258                            JSAMPARRAY * output_data_ptr) | 445                            JSAMPARRAY *output_data_ptr) | 
| 259 { | 446 { | 
| 260 } | 447 } | 
| 261 | 448 | 
| 262 GLOBAL(void) | 449 GLOBAL(void) | 
| 263 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 450 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 
| 264                            jpeg_component_info * compptr, | 451                            jpeg_component_info *compptr, | 
| 265                            JSAMPARRAY input_data, | 452                            JSAMPARRAY input_data, | 
| 266                            JSAMPARRAY * output_data_ptr) | 453                            JSAMPARRAY *output_data_ptr) | 
| 267 { | 454 { | 
| 268 } | 455 } | 
| 269 | 456 | 
| 270 GLOBAL(int) | 457 GLOBAL(int) | 
| 271 jsimd_can_h2v2_merged_upsample (void) | 458 jsimd_can_h2v2_merged_upsample (void) | 
| 272 { | 459 { | 
| 273   init_simd(); | 460   init_simd(); | 
| 274 | 461 | 
| 275   return 0; | 462   return 0; | 
| 276 } | 463 } | 
| (...skipping 20 matching lines...) Expand all  Loading... | 
| 297                             JDIMENSION in_row_group_ctr, | 484                             JDIMENSION in_row_group_ctr, | 
| 298                             JSAMPARRAY output_buf) | 485                             JSAMPARRAY output_buf) | 
| 299 { | 486 { | 
| 300 } | 487 } | 
| 301 | 488 | 
| 302 GLOBAL(int) | 489 GLOBAL(int) | 
| 303 jsimd_can_convsamp (void) | 490 jsimd_can_convsamp (void) | 
| 304 { | 491 { | 
| 305   init_simd(); | 492   init_simd(); | 
| 306 | 493 | 
|  | 494   /* The code is optimised for these values only */ | 
|  | 495   if (DCTSIZE != 8) | 
|  | 496     return 0; | 
|  | 497   if (BITS_IN_JSAMPLE != 8) | 
|  | 498     return 0; | 
|  | 499   if (sizeof(JDIMENSION) != 4) | 
|  | 500     return 0; | 
|  | 501   if (sizeof(DCTELEM) != 2) | 
|  | 502     return 0; | 
|  | 503 | 
|  | 504   if (simd_support & JSIMD_ARM_NEON) | 
|  | 505     return 1; | 
|  | 506 | 
| 307   return 0; | 507   return 0; | 
| 308 } | 508 } | 
| 309 | 509 | 
| 310 GLOBAL(int) | 510 GLOBAL(int) | 
| 311 jsimd_can_convsamp_float (void) | 511 jsimd_can_convsamp_float (void) | 
| 312 { | 512 { | 
| 313   init_simd(); | 513   init_simd(); | 
| 314 | 514 | 
| 315   return 0; | 515   return 0; | 
| 316 } | 516 } | 
| 317 | 517 | 
| 318 GLOBAL(void) | 518 GLOBAL(void) | 
| 319 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 519 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 
| 320                 DCTELEM * workspace) | 520                 DCTELEM *workspace) | 
| 321 { | 521 { | 
|  | 522   jsimd_convsamp_neon(sample_data, start_col, workspace); | 
| 322 } | 523 } | 
| 323 | 524 | 
| 324 GLOBAL(void) | 525 GLOBAL(void) | 
| 325 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 526 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 
| 326                       FAST_FLOAT * workspace) | 527                       FAST_FLOAT *workspace) | 
| 327 { | 528 { | 
| 328 } | 529 } | 
| 329 | 530 | 
| 330 GLOBAL(int) | 531 GLOBAL(int) | 
| 331 jsimd_can_fdct_islow (void) | 532 jsimd_can_fdct_islow (void) | 
| 332 { | 533 { | 
| 333   init_simd(); | 534   init_simd(); | 
| 334 | 535 | 
|  | 536   /* The code is optimised for these values only */ | 
|  | 537   if (DCTSIZE != 8) | 
|  | 538     return 0; | 
|  | 539   if (sizeof(DCTELEM) != 2) | 
|  | 540     return 0; | 
|  | 541 | 
|  | 542   if (simd_support & JSIMD_ARM_NEON) | 
|  | 543     return 1; | 
|  | 544 | 
| 335   return 0; | 545   return 0; | 
| 336 } | 546 } | 
| 337 | 547 | 
| 338 GLOBAL(int) | 548 GLOBAL(int) | 
| 339 jsimd_can_fdct_ifast (void) | 549 jsimd_can_fdct_ifast (void) | 
| 340 { | 550 { | 
| 341   init_simd(); | 551   init_simd(); | 
| 342 | 552 | 
|  | 553   /* The code is optimised for these values only */ | 
|  | 554   if (DCTSIZE != 8) | 
|  | 555     return 0; | 
|  | 556   if (sizeof(DCTELEM) != 2) | 
|  | 557     return 0; | 
|  | 558 | 
|  | 559   if (simd_support & JSIMD_ARM_NEON) | 
|  | 560     return 1; | 
|  | 561 | 
| 343   return 0; | 562   return 0; | 
| 344 } | 563 } | 
| 345 | 564 | 
| 346 GLOBAL(int) | 565 GLOBAL(int) | 
| 347 jsimd_can_fdct_float (void) | 566 jsimd_can_fdct_float (void) | 
| 348 { | 567 { | 
| 349   init_simd(); | 568   init_simd(); | 
| 350 | 569 | 
| 351   return 0; | 570   return 0; | 
| 352 } | 571 } | 
| 353 | 572 | 
| 354 GLOBAL(void) | 573 GLOBAL(void) | 
| 355 jsimd_fdct_islow (DCTELEM * data) | 574 jsimd_fdct_islow (DCTELEM *data) | 
|  | 575 { | 
|  | 576   jsimd_fdct_islow_neon(data); | 
|  | 577 } | 
|  | 578 | 
|  | 579 GLOBAL(void) | 
|  | 580 jsimd_fdct_ifast (DCTELEM *data) | 
|  | 581 { | 
|  | 582   jsimd_fdct_ifast_neon(data); | 
|  | 583 } | 
|  | 584 | 
|  | 585 GLOBAL(void) | 
|  | 586 jsimd_fdct_float (FAST_FLOAT *data) | 
| 356 { | 587 { | 
| 357 } | 588 } | 
| 358 | 589 | 
| 359 GLOBAL(void) |  | 
| 360 jsimd_fdct_ifast (DCTELEM * data) |  | 
| 361 { |  | 
| 362 } |  | 
| 363 |  | 
| 364 GLOBAL(void) |  | 
| 365 jsimd_fdct_float (FAST_FLOAT * data) |  | 
| 366 { |  | 
| 367 } |  | 
| 368 |  | 
| 369 GLOBAL(int) | 590 GLOBAL(int) | 
| 370 jsimd_can_quantize (void) | 591 jsimd_can_quantize (void) | 
| 371 { | 592 { | 
| 372   init_simd(); | 593   init_simd(); | 
| 373 | 594 | 
|  | 595   /* The code is optimised for these values only */ | 
|  | 596   if (DCTSIZE != 8) | 
|  | 597     return 0; | 
|  | 598   if (sizeof(JCOEF) != 2) | 
|  | 599     return 0; | 
|  | 600   if (sizeof(DCTELEM) != 2) | 
|  | 601     return 0; | 
|  | 602 | 
|  | 603   if (simd_support & JSIMD_ARM_NEON) | 
|  | 604     return 1; | 
|  | 605 | 
| 374   return 0; | 606   return 0; | 
| 375 } | 607 } | 
| 376 | 608 | 
| 377 GLOBAL(int) | 609 GLOBAL(int) | 
| 378 jsimd_can_quantize_float (void) | 610 jsimd_can_quantize_float (void) | 
| 379 { | 611 { | 
| 380   init_simd(); | 612   init_simd(); | 
| 381 | 613 | 
| 382   return 0; | 614   return 0; | 
| 383 } | 615 } | 
| 384 | 616 | 
| 385 GLOBAL(void) | 617 GLOBAL(void) | 
| 386 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 618 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, | 
| 387                 DCTELEM * workspace) | 619                 DCTELEM *workspace) | 
|  | 620 { | 
|  | 621   jsimd_quantize_neon(coef_block, divisors, workspace); | 
|  | 622 } | 
|  | 623 | 
|  | 624 GLOBAL(void) | 
|  | 625 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, | 
|  | 626                       FAST_FLOAT *workspace) | 
| 388 { | 627 { | 
| 389 } | 628 } | 
| 390 | 629 | 
| 391 GLOBAL(void) |  | 
| 392 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |  | 
| 393                       FAST_FLOAT * workspace) |  | 
| 394 { |  | 
| 395 } |  | 
| 396 |  | 
| 397 GLOBAL(int) | 630 GLOBAL(int) | 
| 398 jsimd_can_idct_2x2 (void) | 631 jsimd_can_idct_2x2 (void) | 
| 399 { | 632 { | 
| 400   init_simd(); | 633   init_simd(); | 
| 401 | 634 | 
| 402   /* The code is optimised for these values only */ | 635   /* The code is optimised for these values only */ | 
| 403   if (DCTSIZE != 8) | 636   if (DCTSIZE != 8) | 
| 404     return 0; | 637     return 0; | 
| 405   if (sizeof(JCOEF) != 2) | 638   if (sizeof(JCOEF) != 2) | 
| 406     return 0; | 639     return 0; | 
| (...skipping 27 matching lines...) Expand all  Loading... | 
| 434   if (sizeof(ISLOW_MULT_TYPE) != 2) | 667   if (sizeof(ISLOW_MULT_TYPE) != 2) | 
| 435     return 0; | 668     return 0; | 
| 436 | 669 | 
| 437   if (simd_support & JSIMD_ARM_NEON) | 670   if (simd_support & JSIMD_ARM_NEON) | 
| 438     return 1; | 671     return 1; | 
| 439 | 672 | 
| 440   return 0; | 673   return 0; | 
| 441 } | 674 } | 
| 442 | 675 | 
| 443 GLOBAL(void) | 676 GLOBAL(void) | 
| 444 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 677 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 445                 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 678                 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 446                 JDIMENSION output_col) | 679                 JDIMENSION output_col) | 
| 447 { | 680 { | 
| 448   if (simd_support & JSIMD_ARM_NEON) | 681   jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, | 
| 449     jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, | 682                       output_col); | 
| 450                         output_col); |  | 
| 451 } | 683 } | 
| 452 | 684 | 
| 453 GLOBAL(void) | 685 GLOBAL(void) | 
| 454 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 686 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 455                 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 687                 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 456                 JDIMENSION output_col) | 688                 JDIMENSION output_col) | 
| 457 { | 689 { | 
| 458   if (simd_support & JSIMD_ARM_NEON) | 690   jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, | 
| 459     jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, | 691                       output_col); | 
| 460                         output_col); |  | 
| 461 } | 692 } | 
| 462 | 693 | 
| 463 GLOBAL(int) | 694 GLOBAL(int) | 
| 464 jsimd_can_idct_islow (void) | 695 jsimd_can_idct_islow (void) | 
| 465 { | 696 { | 
| 466   init_simd(); | 697   init_simd(); | 
| 467 | 698 | 
| 468   /* The code is optimised for these values only */ | 699   /* The code is optimised for these values only */ | 
| 469   if (DCTSIZE != 8) | 700   if (DCTSIZE != 8) | 
| 470     return 0; | 701     return 0; | 
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 510 | 741 | 
| 511 GLOBAL(int) | 742 GLOBAL(int) | 
| 512 jsimd_can_idct_float (void) | 743 jsimd_can_idct_float (void) | 
| 513 { | 744 { | 
| 514   init_simd(); | 745   init_simd(); | 
| 515 | 746 | 
| 516   return 0; | 747   return 0; | 
| 517 } | 748 } | 
| 518 | 749 | 
| 519 GLOBAL(void) | 750 GLOBAL(void) | 
| 520 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 751 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 521                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 752                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 522                   JDIMENSION output_col) | 753                   JDIMENSION output_col) | 
| 523 { | 754 { | 
| 524   if (simd_support & JSIMD_ARM_NEON) | 755   jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, | 
| 525     jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, | 756                         output_col); | 
| 526                           output_col); |  | 
| 527 } | 757 } | 
| 528 | 758 | 
| 529 GLOBAL(void) | 759 GLOBAL(void) | 
| 530 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 760 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 531                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 761                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 532                   JDIMENSION output_col) | 762                   JDIMENSION output_col) | 
| 533 { | 763 { | 
| 534   if (simd_support & JSIMD_ARM_NEON) | 764   jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, | 
| 535     jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, | 765                         output_col); | 
| 536                           output_col); |  | 
| 537 } | 766 } | 
| 538 | 767 | 
| 539 GLOBAL(void) | 768 GLOBAL(void) | 
| 540 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 769 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
| 541                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 770                   JCOEFPTR coef_block, JSAMPARRAY output_buf, | 
| 542                   JDIMENSION output_col) | 771                   JDIMENSION output_col) | 
| 543 { | 772 { | 
| 544 } | 773 } | 
|  | 774 | 
|  | 775 GLOBAL(int) | 
|  | 776 jsimd_can_huff_encode_one_block (void) | 
|  | 777 { | 
|  | 778   init_simd(); | 
|  | 779 | 
|  | 780   if (DCTSIZE != 8) | 
|  | 781     return 0; | 
|  | 782   if (sizeof(JCOEF) != 2) | 
|  | 783     return 0; | 
|  | 784 | 
|  | 785   if (simd_support & JSIMD_ARM_NEON && simd_huffman) | 
|  | 786     return 1; | 
|  | 787 | 
|  | 788   return 0; | 
|  | 789 } | 
|  | 790 | 
|  | 791 GLOBAL(JOCTET*) | 
|  | 792 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, | 
|  | 793                              int last_dc_val, c_derived_tbl *dctbl, | 
|  | 794                              c_derived_tbl *actbl) | 
|  | 795 { | 
|  | 796   if (simd_features & JSIMD_FASTTBL) | 
|  | 797     return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, | 
|  | 798                                             dctbl, actbl); | 
|  | 799   else | 
|  | 800     return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, | 
|  | 801                                                     last_dc_val, dctbl, actbl); | 
|  | 802 } | 
| OLD | NEW | 
|---|