OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * jsimd_arm.c |
| 3 * |
| 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 * Copyright 2009-2011 D. R. Commander |
| 6 * |
| 7 * Based on the x86 SIMD extension for IJG JPEG library, |
| 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 * |
| 11 * This file contains the interface between the "normal" portions |
| 12 * of the library and the SIMD implementations when running on |
| 13 * ARM architecture. |
| 14 * |
| 15 * Based on the stubs from 'jsimd_none.c' |
| 16 */ |
| 17 |
| 18 #define JPEG_INTERNALS |
| 19 #include "../jinclude.h" |
| 20 #include "../jpeglib.h" |
| 21 #include "../jsimd.h" |
| 22 #include "../jdct.h" |
| 23 #include "../jsimddct.h" |
| 24 #include "jsimd.h" |
| 25 |
| 26 #include <stdio.h> |
| 27 #include <string.h> |
| 28 #include <ctype.h> |
| 29 |
| 30 static unsigned int simd_support = ~0; |
| 31 |
| 32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
| 33 |
| 34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) |
| 35 |
| 36 LOCAL(int) |
| 37 check_feature (char *buffer, char *feature) |
| 38 { |
| 39 char *p; |
| 40 if (*feature == 0) |
| 41 return 0; |
| 42 if (strncmp(buffer, "Features", 8) != 0) |
| 43 return 0; |
| 44 buffer += 8; |
| 45 while (isspace(*buffer)) |
| 46 buffer++; |
| 47 |
| 48 /* Check if 'feature' is present in the buffer as a separate word */ |
| 49 while ((p = strstr(buffer, feature))) { |
| 50 if (p > buffer && !isspace(*(p - 1))) { |
| 51 buffer++; |
| 52 continue; |
| 53 } |
| 54 p += strlen(feature); |
| 55 if (*p != 0 && !isspace(*p)) { |
| 56 buffer++; |
| 57 continue; |
| 58 } |
| 59 return 1; |
| 60 } |
| 61 return 0; |
| 62 } |
| 63 |
| 64 LOCAL(int) |
| 65 parse_proc_cpuinfo (int bufsize) |
| 66 { |
| 67 char *buffer = (char *)malloc(bufsize); |
| 68 FILE *fd; |
| 69 simd_support = 0; |
| 70 |
| 71 if (!buffer) |
| 72 return 0; |
| 73 |
| 74 fd = fopen("/proc/cpuinfo", "r"); |
| 75 if (fd) { |
| 76 while (fgets(buffer, bufsize, fd)) { |
| 77 if (!strchr(buffer, '\n') && !feof(fd)) { |
| 78 /* "impossible" happened - insufficient size of the buffer! */ |
| 79 fclose(fd); |
| 80 free(buffer); |
| 81 return 0; |
| 82 } |
| 83 if (check_feature(buffer, "neon")) |
| 84 simd_support |= JSIMD_ARM_NEON; |
| 85 } |
| 86 fclose(fd); |
| 87 } |
| 88 free(buffer); |
| 89 return 1; |
| 90 } |
| 91 |
| 92 #endif |
| 93 |
| 94 /* |
| 95 * Check what SIMD accelerations are supported. |
| 96 * |
| 97 * FIXME: This code is racy under a multi-threaded environment. |
| 98 */ |
| 99 LOCAL(void) |
| 100 init_simd (void) |
| 101 { |
| 102 char *env = NULL; |
| 103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(
__ANDROID__) |
| 104 int bufsize = 1024; /* an initial guess for the line buffer size limit */ |
| 105 #endif |
| 106 |
| 107 if (simd_support != ~0) |
| 108 return; |
| 109 |
| 110 simd_support = 0; |
| 111 |
| 112 #if defined(__ARM_NEON__) |
| 113 simd_support |= JSIMD_ARM_NEON; |
| 114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
| 115 /* We still have a chance to use NEON regardless of globally used |
| 116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via |
| 117 * /proc/cpuinfo parsing on linux/android */ |
| 118 while (!parse_proc_cpuinfo(bufsize)) { |
| 119 bufsize *= 2; |
| 120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) |
| 121 break; |
| 122 } |
| 123 #endif |
| 124 |
| 125 /* Force different settings through environment variables */ |
| 126 env = getenv("JSIMD_FORCE_ARM_NEON"); |
| 127 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 128 simd_support &= JSIMD_ARM_NEON; |
| 129 env = getenv("JSIMD_FORCE_NO_SIMD"); |
| 130 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 131 simd_support = 0; |
| 132 } |
| 133 |
| 134 GLOBAL(int) |
| 135 jsimd_can_rgb_ycc (void) |
| 136 { |
| 137 init_simd(); |
| 138 |
| 139 return 0; |
| 140 } |
| 141 |
| 142 GLOBAL(int) |
| 143 jsimd_can_rgb_gray (void) |
| 144 { |
| 145 init_simd(); |
| 146 |
| 147 return 0; |
| 148 } |
| 149 |
| 150 GLOBAL(int) |
| 151 jsimd_can_ycc_rgb (void) |
| 152 { |
| 153 init_simd(); |
| 154 |
| 155 /* The code is optimised for these values only */ |
| 156 if (BITS_IN_JSAMPLE != 8) |
| 157 return 0; |
| 158 if (sizeof(JDIMENSION) != 4) |
| 159 return 0; |
| 160 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 161 return 0; |
| 162 if (simd_support & JSIMD_ARM_NEON) |
| 163 return 1; |
| 164 |
| 165 return 0; |
| 166 } |
| 167 |
| 168 GLOBAL(void) |
| 169 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 170 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 171 JDIMENSION output_row, int num_rows) |
| 172 { |
| 173 } |
| 174 |
| 175 GLOBAL(void) |
| 176 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 177 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 178 JDIMENSION output_row, int num_rows) |
| 179 { |
| 180 } |
| 181 |
| 182 GLOBAL(void) |
| 183 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 184 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 185 JSAMPARRAY output_buf, int num_rows) |
| 186 { |
| 187 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 188 |
| 189 switch(cinfo->out_color_space) |
| 190 { |
| 191 case JCS_EXT_RGB: |
| 192 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 193 break; |
| 194 case JCS_EXT_RGBX: |
| 195 neonfct=jsimd_ycc_extrgbx_convert_neon; |
| 196 break; |
| 197 case JCS_EXT_BGR: |
| 198 neonfct=jsimd_ycc_extbgr_convert_neon; |
| 199 break; |
| 200 case JCS_EXT_BGRX: |
| 201 neonfct=jsimd_ycc_extbgrx_convert_neon; |
| 202 break; |
| 203 case JCS_EXT_XBGR: |
| 204 neonfct=jsimd_ycc_extxbgr_convert_neon; |
| 205 break; |
| 206 case JCS_EXT_XRGB: |
| 207 neonfct=jsimd_ycc_extxrgb_convert_neon; |
| 208 break; |
| 209 default: |
| 210 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 211 break; |
| 212 } |
| 213 |
| 214 if (simd_support & JSIMD_ARM_NEON) |
| 215 neonfct(cinfo->output_width, input_buf, |
| 216 input_row, output_buf, num_rows); |
| 217 } |
| 218 |
| 219 GLOBAL(int) |
| 220 jsimd_can_h2v2_downsample (void) |
| 221 { |
| 222 init_simd(); |
| 223 |
| 224 return 0; |
| 225 } |
| 226 |
| 227 GLOBAL(int) |
| 228 jsimd_can_h2v1_downsample (void) |
| 229 { |
| 230 init_simd(); |
| 231 |
| 232 return 0; |
| 233 } |
| 234 |
| 235 GLOBAL(void) |
| 236 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 237 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 238 { |
| 239 } |
| 240 |
| 241 GLOBAL(void) |
| 242 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 243 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 244 { |
| 245 } |
| 246 |
| 247 GLOBAL(int) |
| 248 jsimd_can_h2v2_upsample (void) |
| 249 { |
| 250 init_simd(); |
| 251 |
| 252 return 0; |
| 253 } |
| 254 |
| 255 GLOBAL(int) |
| 256 jsimd_can_h2v1_upsample (void) |
| 257 { |
| 258 init_simd(); |
| 259 |
| 260 return 0; |
| 261 } |
| 262 |
| 263 GLOBAL(void) |
| 264 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 265 jpeg_component_info * compptr, |
| 266 JSAMPARRAY input_data, |
| 267 JSAMPARRAY * output_data_ptr) |
| 268 { |
| 269 } |
| 270 |
| 271 GLOBAL(void) |
| 272 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 273 jpeg_component_info * compptr, |
| 274 JSAMPARRAY input_data, |
| 275 JSAMPARRAY * output_data_ptr) |
| 276 { |
| 277 } |
| 278 |
| 279 GLOBAL(int) |
| 280 jsimd_can_h2v2_fancy_upsample (void) |
| 281 { |
| 282 init_simd(); |
| 283 |
| 284 return 0; |
| 285 } |
| 286 |
| 287 GLOBAL(int) |
| 288 jsimd_can_h2v1_fancy_upsample (void) |
| 289 { |
| 290 init_simd(); |
| 291 |
| 292 return 0; |
| 293 } |
| 294 |
| 295 GLOBAL(void) |
| 296 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 297 jpeg_component_info * compptr, |
| 298 JSAMPARRAY input_data, |
| 299 JSAMPARRAY * output_data_ptr) |
| 300 { |
| 301 } |
| 302 |
| 303 GLOBAL(void) |
| 304 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 305 jpeg_component_info * compptr, |
| 306 JSAMPARRAY input_data, |
| 307 JSAMPARRAY * output_data_ptr) |
| 308 { |
| 309 } |
| 310 |
| 311 GLOBAL(int) |
| 312 jsimd_can_h2v2_merged_upsample (void) |
| 313 { |
| 314 init_simd(); |
| 315 |
| 316 return 0; |
| 317 } |
| 318 |
| 319 GLOBAL(int) |
| 320 jsimd_can_h2v1_merged_upsample (void) |
| 321 { |
| 322 init_simd(); |
| 323 |
| 324 return 0; |
| 325 } |
| 326 |
| 327 GLOBAL(void) |
| 328 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
| 329 JSAMPIMAGE input_buf, |
| 330 JDIMENSION in_row_group_ctr, |
| 331 JSAMPARRAY output_buf) |
| 332 { |
| 333 } |
| 334 |
| 335 GLOBAL(void) |
| 336 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
| 337 JSAMPIMAGE input_buf, |
| 338 JDIMENSION in_row_group_ctr, |
| 339 JSAMPARRAY output_buf) |
| 340 { |
| 341 } |
| 342 |
| 343 GLOBAL(int) |
| 344 jsimd_can_convsamp (void) |
| 345 { |
| 346 init_simd(); |
| 347 |
| 348 return 0; |
| 349 } |
| 350 |
| 351 GLOBAL(int) |
| 352 jsimd_can_convsamp_float (void) |
| 353 { |
| 354 init_simd(); |
| 355 |
| 356 return 0; |
| 357 } |
| 358 |
| 359 GLOBAL(void) |
| 360 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 361 DCTELEM * workspace) |
| 362 { |
| 363 } |
| 364 |
| 365 GLOBAL(void) |
| 366 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 367 FAST_FLOAT * workspace) |
| 368 { |
| 369 } |
| 370 |
| 371 GLOBAL(int) |
| 372 jsimd_can_fdct_islow (void) |
| 373 { |
| 374 init_simd(); |
| 375 |
| 376 return 0; |
| 377 } |
| 378 |
| 379 GLOBAL(int) |
| 380 jsimd_can_fdct_ifast (void) |
| 381 { |
| 382 init_simd(); |
| 383 |
| 384 return 0; |
| 385 } |
| 386 |
| 387 GLOBAL(int) |
| 388 jsimd_can_fdct_float (void) |
| 389 { |
| 390 init_simd(); |
| 391 |
| 392 return 0; |
| 393 } |
| 394 |
| 395 GLOBAL(void) |
| 396 jsimd_fdct_islow (DCTELEM * data) |
| 397 { |
| 398 } |
| 399 |
| 400 GLOBAL(void) |
| 401 jsimd_fdct_ifast (DCTELEM * data) |
| 402 { |
| 403 } |
| 404 |
| 405 GLOBAL(void) |
| 406 jsimd_fdct_float (FAST_FLOAT * data) |
| 407 { |
| 408 } |
| 409 |
| 410 GLOBAL(int) |
| 411 jsimd_can_quantize (void) |
| 412 { |
| 413 init_simd(); |
| 414 |
| 415 return 0; |
| 416 } |
| 417 |
| 418 GLOBAL(int) |
| 419 jsimd_can_quantize_float (void) |
| 420 { |
| 421 init_simd(); |
| 422 |
| 423 return 0; |
| 424 } |
| 425 |
| 426 GLOBAL(void) |
| 427 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
| 428 DCTELEM * workspace) |
| 429 { |
| 430 } |
| 431 |
| 432 GLOBAL(void) |
| 433 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
| 434 FAST_FLOAT * workspace) |
| 435 { |
| 436 } |
| 437 |
| 438 GLOBAL(int) |
| 439 jsimd_can_idct_2x2 (void) |
| 440 { |
| 441 init_simd(); |
| 442 |
| 443 /* The code is optimised for these values only */ |
| 444 if (DCTSIZE != 8) |
| 445 return 0; |
| 446 if (sizeof(JCOEF) != 2) |
| 447 return 0; |
| 448 if (BITS_IN_JSAMPLE != 8) |
| 449 return 0; |
| 450 if (sizeof(JDIMENSION) != 4) |
| 451 return 0; |
| 452 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 453 return 0; |
| 454 |
| 455 if ((simd_support & JSIMD_ARM_NEON)) |
| 456 return 1; |
| 457 |
| 458 return 0; |
| 459 } |
| 460 |
| 461 GLOBAL(int) |
| 462 jsimd_can_idct_4x4 (void) |
| 463 { |
| 464 init_simd(); |
| 465 |
| 466 /* The code is optimised for these values only */ |
| 467 if (DCTSIZE != 8) |
| 468 return 0; |
| 469 if (sizeof(JCOEF) != 2) |
| 470 return 0; |
| 471 if (BITS_IN_JSAMPLE != 8) |
| 472 return 0; |
| 473 if (sizeof(JDIMENSION) != 4) |
| 474 return 0; |
| 475 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 476 return 0; |
| 477 |
| 478 if ((simd_support & JSIMD_ARM_NEON)) |
| 479 return 1; |
| 480 |
| 481 return 0; |
| 482 } |
| 483 |
| 484 GLOBAL(void) |
| 485 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 486 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 487 JDIMENSION output_col) |
| 488 { |
| 489 if ((simd_support & JSIMD_ARM_NEON)) |
| 490 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); |
| 491 } |
| 492 |
| 493 GLOBAL(void) |
| 494 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 495 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 496 JDIMENSION output_col) |
| 497 { |
| 498 if ((simd_support & JSIMD_ARM_NEON)) |
| 499 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); |
| 500 } |
| 501 |
| 502 GLOBAL(int) |
| 503 jsimd_can_idct_islow (void) |
| 504 { |
| 505 init_simd(); |
| 506 |
| 507 return 0; |
| 508 } |
| 509 |
| 510 GLOBAL(int) |
| 511 jsimd_can_idct_ifast (void) |
| 512 { |
| 513 init_simd(); |
| 514 |
| 515 /* The code is optimised for these values only */ |
| 516 if (DCTSIZE != 8) |
| 517 return 0; |
| 518 if (sizeof(JCOEF) != 2) |
| 519 return 0; |
| 520 if (BITS_IN_JSAMPLE != 8) |
| 521 return 0; |
| 522 if (sizeof(JDIMENSION) != 4) |
| 523 return 0; |
| 524 if (sizeof(IFAST_MULT_TYPE) != 2) |
| 525 return 0; |
| 526 if (IFAST_SCALE_BITS != 2) |
| 527 return 0; |
| 528 |
| 529 if ((simd_support & JSIMD_ARM_NEON)) |
| 530 return 1; |
| 531 |
| 532 return 0; |
| 533 } |
| 534 |
| 535 GLOBAL(int) |
| 536 jsimd_can_idct_float (void) |
| 537 { |
| 538 init_simd(); |
| 539 |
| 540 return 0; |
| 541 } |
| 542 |
| 543 GLOBAL(void) |
| 544 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 545 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 546 JDIMENSION output_col) |
| 547 { |
| 548 } |
| 549 |
| 550 GLOBAL(void) |
| 551 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 552 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 553 JDIMENSION output_col) |
| 554 { |
| 555 if ((simd_support & JSIMD_ARM_NEON)) |
| 556 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col
); |
| 557 } |
| 558 |
| 559 GLOBAL(void) |
| 560 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 561 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 562 JDIMENSION output_col) |
| 563 { |
| 564 } |
| 565 |
OLD | NEW |