| OLD | NEW |
| 1 /* | 1 /* |
| 2 * jsimd_arm.c | 2 * jsimd_arm64.c |
| 3 * | 3 * |
| 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 * Copyright 2009-2011 D. R. Commander | 5 * Copyright 2009-2011, 2013-2014 D. R. Commander |
| 6 * | 6 * |
| 7 * Based on the x86 SIMD extension for IJG JPEG library, | 7 * Based on the x86 SIMD extension for IJG JPEG library, |
| 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 * | 10 * |
| 11 * This file contains the interface between the "normal" portions | 11 * This file contains the interface between the "normal" portions |
| 12 * of the library and the SIMD implementations when running on | 12 * of the library and the SIMD implementations when running on a |
| 13 * ARM architecture. | 13 * 64-bit ARM architecture. |
| 14 * | |
| 15 * Based on the stubs from 'jsimd_none.c' | |
| 16 */ | 14 */ |
| 17 | 15 |
| 18 #define JPEG_INTERNALS | 16 #define JPEG_INTERNALS |
| 19 #include "../jinclude.h" | 17 #include "../jinclude.h" |
| 20 #include "../jpeglib.h" | 18 #include "../jpeglib.h" |
| 21 #include "../jsimd.h" | 19 #include "../jsimd.h" |
| 22 #include "../jdct.h" | 20 #include "../jdct.h" |
| 23 #include "../jsimddct.h" | 21 #include "../jsimddct.h" |
| 24 #include "jsimd.h" | 22 #include "jsimd.h" |
| 25 | 23 |
| 26 #include <stdio.h> | 24 #include <stdio.h> |
| 27 #include <string.h> | 25 #include <string.h> |
| 28 #include <ctype.h> | 26 #include <ctype.h> |
| 29 | 27 |
| 30 static unsigned int simd_support = ~0; | 28 static unsigned int simd_support = ~0; |
| 31 | 29 |
| 32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) | |
| 33 | |
| 34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) | |
| 35 | |
| 36 LOCAL(int) | |
| 37 check_feature (char *buffer, char *feature) | |
| 38 { | |
| 39 char *p; | |
| 40 if (*feature == 0) | |
| 41 return 0; | |
| 42 if (strncmp(buffer, "Features", 8) != 0) | |
| 43 return 0; | |
| 44 buffer += 8; | |
| 45 while (isspace(*buffer)) | |
| 46 buffer++; | |
| 47 | |
| 48 /* Check if 'feature' is present in the buffer as a separate word */ | |
| 49 while ((p = strstr(buffer, feature))) { | |
| 50 if (p > buffer && !isspace(*(p - 1))) { | |
| 51 buffer++; | |
| 52 continue; | |
| 53 } | |
| 54 p += strlen(feature); | |
| 55 if (*p != 0 && !isspace(*p)) { | |
| 56 buffer++; | |
| 57 continue; | |
| 58 } | |
| 59 return 1; | |
| 60 } | |
| 61 return 0; | |
| 62 } | |
| 63 | |
| 64 LOCAL(int) | |
| 65 parse_proc_cpuinfo (int bufsize) | |
| 66 { | |
| 67 char *buffer = (char *)malloc(bufsize); | |
| 68 FILE *fd; | |
| 69 simd_support = 0; | |
| 70 | |
| 71 if (!buffer) | |
| 72 return 0; | |
| 73 | |
| 74 fd = fopen("/proc/cpuinfo", "r"); | |
| 75 if (fd) { | |
| 76 while (fgets(buffer, bufsize, fd)) { | |
| 77 if (!strchr(buffer, '\n') && !feof(fd)) { | |
| 78 /* "impossible" happened - insufficient size of the buffer! */ | |
| 79 fclose(fd); | |
| 80 free(buffer); | |
| 81 return 0; | |
| 82 } | |
| 83 if (check_feature(buffer, "neon")) | |
| 84 simd_support |= JSIMD_ARM_NEON; | |
| 85 } | |
| 86 fclose(fd); | |
| 87 } | |
| 88 free(buffer); | |
| 89 return 1; | |
| 90 } | |
| 91 | |
| 92 #endif | |
| 93 | |
| 94 /* | 30 /* |
| 95 * Check what SIMD accelerations are supported. | 31 * Check what SIMD accelerations are supported. |
| 96 * | 32 * |
| 97 * FIXME: This code is racy under a multi-threaded environment. | 33 * FIXME: This code is racy under a multi-threaded environment. |
| 98 */ | 34 */ |
| 35 |
| 36 /* |
| 37 * ARMv8 architectures support NEON extensions by default. |
| 38 * It is no longer optional as it was with ARMv7. |
| 39 */ |
| 40 |
| 41 |
| 99 LOCAL(void) | 42 LOCAL(void) |
| 100 init_simd (void) | 43 init_simd (void) |
| 101 { | 44 { |
| 102 char *env = NULL; | 45 char *env = NULL; |
| 103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(
__ANDROID__) | |
| 104 int bufsize = 1024; /* an initial guess for the line buffer size limit */ | |
| 105 #endif | |
| 106 | 46 |
| 107 if (simd_support != ~0U) | 47 if (simd_support != ~0U) |
| 108 return; | 48 return; |
| 109 | 49 |
| 110 simd_support = 0; | 50 simd_support = 0; |
| 111 | 51 |
| 112 #if defined(__ARM_NEON__) | |
| 113 simd_support |= JSIMD_ARM_NEON; | 52 simd_support |= JSIMD_ARM_NEON; |
| 114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) | |
| 115 /* We still have a chance to use NEON regardless of globally used | |
| 116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via | |
| 117 * /proc/cpuinfo parsing on linux/android */ | |
| 118 while (!parse_proc_cpuinfo(bufsize)) { | |
| 119 bufsize *= 2; | |
| 120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) | |
| 121 break; | |
| 122 } | |
| 123 #endif | |
| 124 | 53 |
| 125 /* Force different settings through environment variables */ | 54 /* Force different settings through environment variables */ |
| 126 env = getenv("JSIMD_FORCE_ARM_NEON"); | 55 env = getenv("JSIMD_FORCENEON"); |
| 127 if ((env != NULL) && (strcmp(env, "1") == 0)) | 56 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 128 simd_support &= JSIMD_ARM_NEON; | 57 simd_support &= JSIMD_ARM_NEON; |
| 129 env = getenv("JSIMD_FORCE_NO_SIMD"); | 58 env = getenv("JSIMD_FORCENONE"); |
| 130 if ((env != NULL) && (strcmp(env, "1") == 0)) | 59 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 131 simd_support = 0; | 60 simd_support = 0; |
| 132 } | 61 } |
| 133 | 62 |
| 134 GLOBAL(int) | 63 GLOBAL(int) |
| 135 jsimd_can_rgb_ycc (void) | 64 jsimd_can_rgb_ycc (void) |
| 136 { | 65 { |
| 137 init_simd(); | 66 init_simd(); |
| 138 | 67 |
| 139 /* The code is optimised for these values only */ | |
| 140 if (BITS_IN_JSAMPLE != 8) | |
| 141 return 0; | |
| 142 if (sizeof(JDIMENSION) != 4) | |
| 143 return 0; | |
| 144 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | |
| 145 return 0; | |
| 146 | |
| 147 if (simd_support & JSIMD_ARM_NEON) | |
| 148 return 1; | |
| 149 | |
| 150 return 0; | 68 return 0; |
| 151 } | 69 } |
| 152 | 70 |
| 153 GLOBAL(int) | 71 GLOBAL(int) |
| 154 jsimd_can_rgb_gray (void) | 72 jsimd_can_rgb_gray (void) |
| 155 { | 73 { |
| 156 init_simd(); | 74 init_simd(); |
| 157 | 75 |
| 158 return 0; | 76 return 0; |
| 159 } | 77 } |
| 160 | 78 |
| 161 GLOBAL(int) | 79 GLOBAL(int) |
| 162 jsimd_can_ycc_rgb (void) | 80 jsimd_can_ycc_rgb (void) |
| 163 { | 81 { |
| 164 init_simd(); | 82 init_simd(); |
| 165 | 83 |
| 166 /* The code is optimised for these values only */ | 84 /* The code is optimised for these values only */ |
| 167 if (BITS_IN_JSAMPLE != 8) | 85 if (BITS_IN_JSAMPLE != 8) |
| 168 return 0; | 86 return 0; |
| 169 if (sizeof(JDIMENSION) != 4) | 87 if (sizeof(JDIMENSION) != 4) |
| 170 return 0; | 88 return 0; |
| 171 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 89 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 172 return 0; | 90 return 0; |
| 91 |
| 173 if (simd_support & JSIMD_ARM_NEON) | 92 if (simd_support & JSIMD_ARM_NEON) |
| 174 return 1; | 93 return 1; |
| 175 | 94 |
| 176 return 0; | 95 return 0; |
| 177 } | 96 } |
| 178 | 97 |
| 179 GLOBAL(void) | 98 GLOBAL(void) |
| 180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 99 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 100 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 182 JDIMENSION output_row, int num_rows) | 101 JDIMENSION output_row, int num_rows) |
| 183 { | 102 { |
| 184 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | |
| 185 | |
| 186 switch(cinfo->in_color_space) | |
| 187 { | |
| 188 case JCS_EXT_RGB: | |
| 189 neonfct=jsimd_extrgb_ycc_convert_neon; | |
| 190 break; | |
| 191 case JCS_EXT_RGBX: | |
| 192 case JCS_EXT_RGBA: | |
| 193 neonfct=jsimd_extrgbx_ycc_convert_neon; | |
| 194 break; | |
| 195 case JCS_EXT_BGR: | |
| 196 neonfct=jsimd_extbgr_ycc_convert_neon; | |
| 197 break; | |
| 198 case JCS_EXT_BGRX: | |
| 199 case JCS_EXT_BGRA: | |
| 200 neonfct=jsimd_extbgrx_ycc_convert_neon; | |
| 201 break; | |
| 202 case JCS_EXT_XBGR: | |
| 203 case JCS_EXT_ABGR: | |
| 204 neonfct=jsimd_extxbgr_ycc_convert_neon; | |
| 205 break; | |
| 206 case JCS_EXT_XRGB: | |
| 207 case JCS_EXT_ARGB: | |
| 208 neonfct=jsimd_extxrgb_ycc_convert_neon; | |
| 209 break; | |
| 210 default: | |
| 211 neonfct=jsimd_extrgb_ycc_convert_neon; | |
| 212 break; | |
| 213 } | |
| 214 | |
| 215 if (simd_support & JSIMD_ARM_NEON) | |
| 216 neonfct(cinfo->image_width, input_buf, | |
| 217 output_buf, output_row, num_rows); | |
| 218 } | 103 } |
| 219 | 104 |
| 220 GLOBAL(void) | 105 GLOBAL(void) |
| 221 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 106 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 222 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 107 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 223 JDIMENSION output_row, int num_rows) | 108 JDIMENSION output_row, int num_rows) |
| 224 { | 109 { |
| 225 } | 110 } |
| 226 | 111 |
| 227 GLOBAL(void) | 112 GLOBAL(void) |
| 228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 113 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 229 JSAMPIMAGE input_buf, JDIMENSION input_row, | 114 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 230 JSAMPARRAY output_buf, int num_rows) | 115 JSAMPARRAY output_buf, int num_rows) |
| 231 { | 116 { |
| 232 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 117 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 233 | 118 |
| 234 switch(cinfo->out_color_space) | 119 switch(cinfo->out_color_space) { |
| 235 { | |
| 236 case JCS_EXT_RGB: | 120 case JCS_EXT_RGB: |
| 237 neonfct=jsimd_ycc_extrgb_convert_neon; | 121 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 238 break; | 122 break; |
| 239 case JCS_EXT_RGBX: | 123 case JCS_EXT_RGBX: |
| 240 case JCS_EXT_RGBA: | 124 case JCS_EXT_RGBA: |
| 241 neonfct=jsimd_ycc_extrgbx_convert_neon; | 125 neonfct=jsimd_ycc_extrgbx_convert_neon; |
| 242 break; | 126 break; |
| 243 case JCS_EXT_BGR: | 127 case JCS_EXT_BGR: |
| 244 neonfct=jsimd_ycc_extbgr_convert_neon; | 128 neonfct=jsimd_ycc_extbgr_convert_neon; |
| 245 break; | 129 break; |
| 246 case JCS_EXT_BGRX: | 130 case JCS_EXT_BGRX: |
| 247 case JCS_EXT_BGRA: | 131 case JCS_EXT_BGRA: |
| 248 neonfct=jsimd_ycc_extbgrx_convert_neon; | 132 neonfct=jsimd_ycc_extbgrx_convert_neon; |
| 249 break; | 133 break; |
| 250 case JCS_EXT_XBGR: | 134 case JCS_EXT_XBGR: |
| 251 case JCS_EXT_ABGR: | 135 case JCS_EXT_ABGR: |
| 252 neonfct=jsimd_ycc_extxbgr_convert_neon; | 136 neonfct=jsimd_ycc_extxbgr_convert_neon; |
| 253 break; | 137 break; |
| 254 case JCS_EXT_XRGB: | 138 case JCS_EXT_XRGB: |
| 255 case JCS_EXT_ARGB: | 139 case JCS_EXT_ARGB: |
| 256 neonfct=jsimd_ycc_extxrgb_convert_neon; | 140 neonfct=jsimd_ycc_extxrgb_convert_neon; |
| 257 break; | 141 break; |
| 258 default: | 142 default: |
| 259 neonfct=jsimd_ycc_extrgb_convert_neon; | 143 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 260 break; | 144 break; |
| 261 } | 145 } |
| 262 | 146 |
| 263 if (simd_support & JSIMD_ARM_NEON) | 147 if (simd_support & JSIMD_ARM_NEON) |
| 264 neonfct(cinfo->output_width, input_buf, | 148 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 265 input_row, output_buf, num_rows); | |
| 266 } | 149 } |
| 267 | 150 |
| 268 GLOBAL(int) | 151 GLOBAL(int) |
| 269 jsimd_can_h2v2_downsample (void) | 152 jsimd_can_h2v2_downsample (void) |
| 270 { | 153 { |
| 271 init_simd(); | 154 init_simd(); |
| 272 | 155 |
| 273 return 0; | 156 return 0; |
| 274 } | 157 } |
| 275 | 158 |
| (...skipping 28 matching lines...) Expand all Loading... |
| 304 GLOBAL(int) | 187 GLOBAL(int) |
| 305 jsimd_can_h2v1_upsample (void) | 188 jsimd_can_h2v1_upsample (void) |
| 306 { | 189 { |
| 307 init_simd(); | 190 init_simd(); |
| 308 | 191 |
| 309 return 0; | 192 return 0; |
| 310 } | 193 } |
| 311 | 194 |
| 312 GLOBAL(void) | 195 GLOBAL(void) |
| 313 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 196 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 314 jpeg_component_info * compptr, | 197 jpeg_component_info * compptr, |
| 315 JSAMPARRAY input_data, | 198 JSAMPARRAY input_data, |
| 316 JSAMPARRAY * output_data_ptr) | 199 JSAMPARRAY * output_data_ptr) |
| 317 { | 200 { |
| 318 } | 201 } |
| 319 | 202 |
| 320 GLOBAL(void) | 203 GLOBAL(void) |
| 321 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 204 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 322 jpeg_component_info * compptr, | 205 jpeg_component_info * compptr, |
| 323 JSAMPARRAY input_data, | 206 JSAMPARRAY input_data, |
| 324 JSAMPARRAY * output_data_ptr) | 207 JSAMPARRAY * output_data_ptr) |
| 325 { | 208 { |
| 326 } | 209 } |
| 327 | 210 |
| 328 GLOBAL(int) | 211 GLOBAL(int) |
| 329 jsimd_can_h2v2_fancy_upsample (void) | 212 jsimd_can_h2v2_fancy_upsample (void) |
| 330 { | 213 { |
| 331 init_simd(); | 214 init_simd(); |
| 332 | 215 |
| 333 return 0; | 216 return 0; |
| 334 } | 217 } |
| 335 | 218 |
| 336 GLOBAL(int) | 219 GLOBAL(int) |
| 337 jsimd_can_h2v1_fancy_upsample (void) | 220 jsimd_can_h2v1_fancy_upsample (void) |
| 338 { | 221 { |
| 339 init_simd(); | 222 init_simd(); |
| 340 | 223 |
| 341 /* The code is optimised for these values only */ | |
| 342 if (BITS_IN_JSAMPLE != 8) | |
| 343 return 0; | |
| 344 if (sizeof(JDIMENSION) != 4) | |
| 345 return 0; | |
| 346 | |
| 347 if (simd_support & JSIMD_ARM_NEON) | |
| 348 return 1; | |
| 349 | |
| 350 return 0; | 224 return 0; |
| 351 } | 225 } |
| 352 | 226 |
| 353 GLOBAL(void) | 227 GLOBAL(void) |
| 354 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 228 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 355 jpeg_component_info * compptr, | 229 jpeg_component_info * compptr, |
| 356 JSAMPARRAY input_data, | 230 JSAMPARRAY input_data, |
| 357 JSAMPARRAY * output_data_ptr) | 231 JSAMPARRAY * output_data_ptr) |
| 358 { | 232 { |
| 359 } | 233 } |
| 360 | 234 |
| 361 GLOBAL(void) | 235 GLOBAL(void) |
| 362 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 236 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 363 jpeg_component_info * compptr, | 237 jpeg_component_info * compptr, |
| 364 JSAMPARRAY input_data, | 238 JSAMPARRAY input_data, |
| 365 JSAMPARRAY * output_data_ptr) | 239 JSAMPARRAY * output_data_ptr) |
| 366 { | 240 { |
| 367 if (simd_support & JSIMD_ARM_NEON) | |
| 368 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, | |
| 369 compptr->downsampled_width, input_data, output_data_ptr); | |
| 370 } | 241 } |
| 371 | 242 |
| 372 GLOBAL(int) | 243 GLOBAL(int) |
| 373 jsimd_can_h2v2_merged_upsample (void) | 244 jsimd_can_h2v2_merged_upsample (void) |
| 374 { | 245 { |
| 375 init_simd(); | 246 init_simd(); |
| 376 | 247 |
| 377 return 0; | 248 return 0; |
| 378 } | 249 } |
| 379 | 250 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 399 JDIMENSION in_row_group_ctr, | 270 JDIMENSION in_row_group_ctr, |
| 400 JSAMPARRAY output_buf) | 271 JSAMPARRAY output_buf) |
| 401 { | 272 { |
| 402 } | 273 } |
| 403 | 274 |
| 404 GLOBAL(int) | 275 GLOBAL(int) |
| 405 jsimd_can_convsamp (void) | 276 jsimd_can_convsamp (void) |
| 406 { | 277 { |
| 407 init_simd(); | 278 init_simd(); |
| 408 | 279 |
| 409 /* The code is optimised for these values only */ | |
| 410 if (DCTSIZE != 8) | |
| 411 return 0; | |
| 412 if (BITS_IN_JSAMPLE != 8) | |
| 413 return 0; | |
| 414 if (sizeof(JDIMENSION) != 4) | |
| 415 return 0; | |
| 416 if (sizeof(DCTELEM) != 2) | |
| 417 return 0; | |
| 418 | |
| 419 if (simd_support & JSIMD_ARM_NEON) | |
| 420 return 1; | |
| 421 | |
| 422 return 0; | 280 return 0; |
| 423 } | 281 } |
| 424 | 282 |
| 425 GLOBAL(int) | 283 GLOBAL(int) |
| 426 jsimd_can_convsamp_float (void) | 284 jsimd_can_convsamp_float (void) |
| 427 { | 285 { |
| 428 init_simd(); | 286 init_simd(); |
| 429 | 287 |
| 430 return 0; | 288 return 0; |
| 431 } | 289 } |
| 432 | 290 |
| 433 GLOBAL(void) | 291 GLOBAL(void) |
| 434 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 292 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 435 DCTELEM * workspace) | 293 DCTELEM * workspace) |
| 436 { | 294 { |
| 437 if (simd_support & JSIMD_ARM_NEON) | |
| 438 jsimd_convsamp_neon(sample_data, start_col, workspace); | |
| 439 } | 295 } |
| 440 | 296 |
| 441 GLOBAL(void) | 297 GLOBAL(void) |
| 442 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 298 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 443 FAST_FLOAT * workspace) | 299 FAST_FLOAT * workspace) |
| 444 { | 300 { |
| 445 } | 301 } |
| 446 | 302 |
| 447 GLOBAL(int) | 303 GLOBAL(int) |
| 448 jsimd_can_fdct_islow (void) | 304 jsimd_can_fdct_islow (void) |
| 449 { | 305 { |
| 450 init_simd(); | 306 init_simd(); |
| 451 | 307 |
| 452 return 0; | 308 return 0; |
| 453 } | 309 } |
| 454 | 310 |
| 455 GLOBAL(int) | 311 GLOBAL(int) |
| 456 jsimd_can_fdct_ifast (void) | 312 jsimd_can_fdct_ifast (void) |
| 457 { | 313 { |
| 458 init_simd(); | 314 init_simd(); |
| 459 | 315 |
| 460 /* The code is optimised for these values only */ | |
| 461 if (DCTSIZE != 8) | |
| 462 return 0; | |
| 463 if (sizeof(DCTELEM) != 2) | |
| 464 return 0; | |
| 465 | |
| 466 if (simd_support & JSIMD_ARM_NEON) | |
| 467 return 1; | |
| 468 | |
| 469 return 0; | 316 return 0; |
| 470 } | 317 } |
| 471 | 318 |
| 472 GLOBAL(int) | 319 GLOBAL(int) |
| 473 jsimd_can_fdct_float (void) | 320 jsimd_can_fdct_float (void) |
| 474 { | 321 { |
| 475 init_simd(); | 322 init_simd(); |
| 476 | 323 |
| 477 return 0; | 324 return 0; |
| 478 } | 325 } |
| 479 | 326 |
| 480 GLOBAL(void) | 327 GLOBAL(void) |
| 481 jsimd_fdct_islow (DCTELEM * data) | 328 jsimd_fdct_islow (DCTELEM * data) |
| 482 { | 329 { |
| 483 } | 330 } |
| 484 | 331 |
| 485 GLOBAL(void) | 332 GLOBAL(void) |
| 486 jsimd_fdct_ifast (DCTELEM * data) | 333 jsimd_fdct_ifast (DCTELEM * data) |
| 487 { | 334 { |
| 488 if (simd_support & JSIMD_ARM_NEON) | |
| 489 jsimd_fdct_ifast_neon(data); | |
| 490 } | 335 } |
| 491 | 336 |
| 492 GLOBAL(void) | 337 GLOBAL(void) |
| 493 jsimd_fdct_float (FAST_FLOAT * data) | 338 jsimd_fdct_float (FAST_FLOAT * data) |
| 494 { | 339 { |
| 495 } | 340 } |
| 496 | 341 |
| 497 GLOBAL(int) | 342 GLOBAL(int) |
| 498 jsimd_can_quantize (void) | 343 jsimd_can_quantize (void) |
| 499 { | 344 { |
| 500 init_simd(); | 345 init_simd(); |
| 501 | 346 |
| 502 /* The code is optimised for these values only */ | |
| 503 if (DCTSIZE != 8) | |
| 504 return 0; | |
| 505 if (sizeof(JCOEF) != 2) | |
| 506 return 0; | |
| 507 if (sizeof(DCTELEM) != 2) | |
| 508 return 0; | |
| 509 | |
| 510 if (simd_support & JSIMD_ARM_NEON) | |
| 511 return 1; | |
| 512 | |
| 513 return 0; | 347 return 0; |
| 514 } | 348 } |
| 515 | 349 |
| 516 GLOBAL(int) | 350 GLOBAL(int) |
| 517 jsimd_can_quantize_float (void) | 351 jsimd_can_quantize_float (void) |
| 518 { | 352 { |
| 519 init_simd(); | 353 init_simd(); |
| 520 | 354 |
| 521 return 0; | 355 return 0; |
| 522 } | 356 } |
| 523 | 357 |
| 524 GLOBAL(void) | 358 GLOBAL(void) |
| 525 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 359 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
| 526 DCTELEM * workspace) | 360 DCTELEM * workspace) |
| 527 { | 361 { |
| 528 if (simd_support & JSIMD_ARM_NEON) | |
| 529 jsimd_quantize_neon(coef_block, divisors, workspace); | |
| 530 } | 362 } |
| 531 | 363 |
| 532 GLOBAL(void) | 364 GLOBAL(void) |
| 533 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 365 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
| 534 FAST_FLOAT * workspace) | 366 FAST_FLOAT * workspace) |
| 535 { | 367 { |
| 536 } | 368 } |
| 537 | 369 |
| 538 GLOBAL(int) | 370 GLOBAL(int) |
| 539 jsimd_can_idct_2x2 (void) | 371 jsimd_can_idct_2x2 (void) |
| 540 { | 372 { |
| 541 init_simd(); | 373 init_simd(); |
| 542 | 374 |
| 543 /* The code is optimised for these values only */ | 375 /* The code is optimised for these values only */ |
| 544 if (DCTSIZE != 8) | 376 if (DCTSIZE != 8) |
| 545 return 0; | 377 return 0; |
| 546 if (sizeof(JCOEF) != 2) | 378 if (sizeof(JCOEF) != 2) |
| 547 return 0; | 379 return 0; |
| 548 if (BITS_IN_JSAMPLE != 8) | 380 if (BITS_IN_JSAMPLE != 8) |
| 549 return 0; | 381 return 0; |
| 550 if (sizeof(JDIMENSION) != 4) | 382 if (sizeof(JDIMENSION) != 4) |
| 551 return 0; | 383 return 0; |
| 552 if (sizeof(ISLOW_MULT_TYPE) != 2) | 384 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 553 return 0; | 385 return 0; |
| 554 | 386 |
| 555 if ((simd_support & JSIMD_ARM_NEON)) | 387 if (simd_support & JSIMD_ARM_NEON) |
| 556 return 1; | 388 return 1; |
| 557 | 389 |
| 558 return 0; | 390 return 0; |
| 559 } | 391 } |
| 560 | 392 |
| 561 GLOBAL(int) | 393 GLOBAL(int) |
| 562 jsimd_can_idct_4x4 (void) | 394 jsimd_can_idct_4x4 (void) |
| 563 { | 395 { |
| 564 init_simd(); | 396 init_simd(); |
| 565 | 397 |
| 566 /* The code is optimised for these values only */ | 398 /* The code is optimised for these values only */ |
| 567 if (DCTSIZE != 8) | 399 if (DCTSIZE != 8) |
| 568 return 0; | 400 return 0; |
| 569 if (sizeof(JCOEF) != 2) | 401 if (sizeof(JCOEF) != 2) |
| 570 return 0; | 402 return 0; |
| 571 if (BITS_IN_JSAMPLE != 8) | 403 if (BITS_IN_JSAMPLE != 8) |
| 572 return 0; | 404 return 0; |
| 573 if (sizeof(JDIMENSION) != 4) | 405 if (sizeof(JDIMENSION) != 4) |
| 574 return 0; | 406 return 0; |
| 575 if (sizeof(ISLOW_MULT_TYPE) != 2) | 407 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 576 return 0; | 408 return 0; |
| 577 | 409 |
| 578 if ((simd_support & JSIMD_ARM_NEON)) | 410 if (simd_support & JSIMD_ARM_NEON) |
| 579 return 1; | 411 return 1; |
| 580 | 412 |
| 581 return 0; | 413 return 0; |
| 582 } | 414 } |
| 583 | 415 |
| 584 GLOBAL(void) | 416 GLOBAL(void) |
| 585 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 417 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 586 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 418 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 587 JDIMENSION output_col) | 419 JDIMENSION output_col) |
| 588 { | 420 { |
| 589 if ((simd_support & JSIMD_ARM_NEON)) | 421 if (simd_support & JSIMD_ARM_NEON) |
| 590 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); | 422 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, |
| 423 output_col); |
| 591 } | 424 } |
| 592 | 425 |
| 593 GLOBAL(void) | 426 GLOBAL(void) |
| 594 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 427 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 595 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 428 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 596 JDIMENSION output_col) | 429 JDIMENSION output_col) |
| 597 { | 430 { |
| 598 if ((simd_support & JSIMD_ARM_NEON)) | 431 if (simd_support & JSIMD_ARM_NEON) |
| 599 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); | 432 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, |
| 433 output_col); |
| 600 } | 434 } |
| 601 | 435 |
| 602 GLOBAL(int) | 436 GLOBAL(int) |
| 603 jsimd_can_idct_islow (void) | 437 jsimd_can_idct_islow (void) |
| 604 { | 438 { |
| 605 init_simd(); | 439 init_simd(); |
| 606 | 440 |
| 607 /* The code is optimised for these values only */ | 441 /* The code is optimised for these values only */ |
| 608 if (DCTSIZE != 8) | 442 if (DCTSIZE != 8) |
| 609 return 0; | 443 return 0; |
| (...skipping 24 matching lines...) Expand all Loading... |
| 634 return 0; | 468 return 0; |
| 635 if (BITS_IN_JSAMPLE != 8) | 469 if (BITS_IN_JSAMPLE != 8) |
| 636 return 0; | 470 return 0; |
| 637 if (sizeof(JDIMENSION) != 4) | 471 if (sizeof(JDIMENSION) != 4) |
| 638 return 0; | 472 return 0; |
| 639 if (sizeof(IFAST_MULT_TYPE) != 2) | 473 if (sizeof(IFAST_MULT_TYPE) != 2) |
| 640 return 0; | 474 return 0; |
| 641 if (IFAST_SCALE_BITS != 2) | 475 if (IFAST_SCALE_BITS != 2) |
| 642 return 0; | 476 return 0; |
| 643 | 477 |
| 644 if ((simd_support & JSIMD_ARM_NEON)) | 478 if (simd_support & JSIMD_ARM_NEON) |
| 645 return 1; | 479 return 1; |
| 646 | 480 |
| 647 return 0; | 481 return 0; |
| 648 } | 482 } |
| 649 | 483 |
| 650 GLOBAL(int) | 484 GLOBAL(int) |
| 651 jsimd_can_idct_float (void) | 485 jsimd_can_idct_float (void) |
| 652 { | 486 { |
| 653 init_simd(); | 487 init_simd(); |
| 654 | 488 |
| 655 return 0; | 489 return 0; |
| 656 } | 490 } |
| 657 | 491 |
| 658 GLOBAL(void) | 492 GLOBAL(void) |
| 659 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 493 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 660 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 494 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 661 JDIMENSION output_col) | 495 JDIMENSION output_col) |
| 662 { | 496 { |
| 663 if ((simd_support & JSIMD_ARM_NEON)) | 497 if (simd_support & JSIMD_ARM_NEON) |
| 664 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col
); | 498 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, |
| 499 output_col); |
| 665 } | 500 } |
| 666 | 501 |
| 667 GLOBAL(void) | 502 GLOBAL(void) |
| 668 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 503 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 669 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 504 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 670 JDIMENSION output_col) | 505 JDIMENSION output_col) |
| 671 { | 506 { |
| 672 if ((simd_support & JSIMD_ARM_NEON)) | 507 if (simd_support & JSIMD_ARM_NEON) |
| 673 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col
); | 508 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, |
| 509 output_col); |
| 674 } | 510 } |
| 675 | 511 |
| 676 GLOBAL(void) | 512 GLOBAL(void) |
| 677 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 513 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 678 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 514 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 679 JDIMENSION output_col) | 515 JDIMENSION output_col) |
| 680 { | 516 { |
| 681 } | 517 } |
| 682 | |
| OLD | NEW |