OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * jsimd_i386.c |
| 3 * |
| 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 * Copyright 2009 D. R. Commander |
| 6 * |
| 7 * Based on the x86 SIMD extension for IJG JPEG library, |
| 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 * |
| 10 * This file contains the interface between the "normal" portions |
| 11 * of the library and the SIMD implementations when running on a |
| 12 * 32-bit x86 architecture. |
| 13 */ |
| 14 |
| 15 #define JPEG_INTERNALS |
| 16 #include "../jinclude.h" |
| 17 #include "../jpeglib.h" |
| 18 #include "../jsimd.h" |
| 19 #include "../jdct.h" |
| 20 #include "../jsimddct.h" |
| 21 #include "jsimd.h" |
| 22 |
| 23 /* |
| 24 * In the PIC cases, we have no guarantee that constants will keep |
| 25 * their alignment. This macro allows us to verify it at runtime. |
| 26 */ |
| 27 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) |
| 28 |
| 29 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
| 30 |
| 31 static unsigned int simd_support = ~0; |
| 32 |
| 33 /* |
| 34 * Check what SIMD accelerations are supported. |
| 35 * |
| 36 * FIXME: This code is racy under a multi-threaded environment. |
| 37 */ |
| 38 LOCAL(void) |
| 39 init_simd (void) |
| 40 { |
| 41 char *env = NULL; |
| 42 |
| 43 if (simd_support != ~0) |
| 44 return; |
| 45 |
| 46 simd_support = jpeg_simd_cpu_support(); |
| 47 |
| 48 /* Force different settings through environment variables */ |
| 49 env = getenv("JSIMD_FORCEMMX"); |
| 50 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 51 simd_support &= JSIMD_MMX; |
| 52 env = getenv("JSIMD_FORCE3DNOW"); |
| 53 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 54 simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
| 55 env = getenv("JSIMD_FORCESSE"); |
| 56 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 57 simd_support &= JSIMD_SSE|JSIMD_MMX; |
| 58 env = getenv("JSIMD_FORCESSE2"); |
| 59 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 60 simd_support &= JSIMD_SSE2; |
| 61 } |
| 62 |
| 63 GLOBAL(int) |
| 64 jsimd_can_rgb_ycc (void) |
| 65 { |
| 66 init_simd(); |
| 67 |
| 68 /* The code is optimised for these values only */ |
| 69 if (BITS_IN_JSAMPLE != 8) |
| 70 return 0; |
| 71 if (sizeof(JDIMENSION) != 4) |
| 72 return 0; |
| 73 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 74 return 0; |
| 75 |
| 76 if ((simd_support & JSIMD_SSE2) && |
| 77 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 78 return 1; |
| 79 if (simd_support & JSIMD_MMX) |
| 80 return 1; |
| 81 |
| 82 return 0; |
| 83 } |
| 84 |
| 85 GLOBAL(int) |
| 86 jsimd_can_ycc_rgb (void) |
| 87 { |
| 88 init_simd(); |
| 89 |
| 90 /* The code is optimised for these values only */ |
| 91 if (BITS_IN_JSAMPLE != 8) |
| 92 return 0; |
| 93 if (sizeof(JDIMENSION) != 4) |
| 94 return 0; |
| 95 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 96 return 0; |
| 97 |
| 98 if ((simd_support & JSIMD_SSE2) && |
| 99 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
| 100 return 1; |
| 101 if (simd_support & JSIMD_MMX) |
| 102 return 1; |
| 103 |
| 104 return 0; |
| 105 } |
| 106 |
| 107 GLOBAL(void) |
| 108 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 109 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 110 JDIMENSION output_row, int num_rows) |
| 111 { |
| 112 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 113 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 114 |
| 115 switch(cinfo->in_color_space) |
| 116 { |
| 117 case JCS_EXT_RGB: |
| 118 sse2fct=jsimd_extrgb_ycc_convert_sse2; |
| 119 mmxfct=jsimd_extrgb_ycc_convert_mmx; |
| 120 break; |
| 121 case JCS_EXT_RGBX: |
| 122 sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
| 123 mmxfct=jsimd_extrgbx_ycc_convert_mmx; |
| 124 break; |
| 125 case JCS_EXT_BGR: |
| 126 sse2fct=jsimd_extbgr_ycc_convert_sse2; |
| 127 mmxfct=jsimd_extbgr_ycc_convert_mmx; |
| 128 break; |
| 129 case JCS_EXT_BGRX: |
| 130 sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
| 131 mmxfct=jsimd_extbgrx_ycc_convert_mmx; |
| 132 break; |
| 133 case JCS_EXT_XBGR: |
| 134 sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
| 135 mmxfct=jsimd_extxbgr_ycc_convert_mmx; |
| 136 break; |
| 137 case JCS_EXT_XRGB: |
| 138 sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
| 139 mmxfct=jsimd_extxrgb_ycc_convert_mmx; |
| 140 break; |
| 141 default: |
| 142 sse2fct=jsimd_rgb_ycc_convert_sse2; |
| 143 mmxfct=jsimd_rgb_ycc_convert_mmx; |
| 144 break; |
| 145 } |
| 146 |
| 147 if ((simd_support & JSIMD_SSE2) && |
| 148 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 149 sse2fct(cinfo->image_width, input_buf, |
| 150 output_buf, output_row, num_rows); |
| 151 else if (simd_support & JSIMD_MMX) |
| 152 mmxfct(cinfo->image_width, input_buf, |
| 153 output_buf, output_row, num_rows); |
| 154 } |
| 155 |
| 156 GLOBAL(void) |
| 157 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 158 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 159 JSAMPARRAY output_buf, int num_rows) |
| 160 { |
| 161 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 162 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 163 |
| 164 switch(cinfo->out_color_space) |
| 165 { |
| 166 case JCS_EXT_RGB: |
| 167 sse2fct=jsimd_ycc_extrgb_convert_sse2; |
| 168 mmxfct=jsimd_ycc_extrgb_convert_mmx; |
| 169 break; |
| 170 case JCS_EXT_RGBX: |
| 171 sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
| 172 mmxfct=jsimd_ycc_extrgbx_convert_mmx; |
| 173 break; |
| 174 case JCS_EXT_BGR: |
| 175 sse2fct=jsimd_ycc_extbgr_convert_sse2; |
| 176 mmxfct=jsimd_ycc_extbgr_convert_mmx; |
| 177 break; |
| 178 case JCS_EXT_BGRX: |
| 179 sse2fct=jsimd_ycc_extbgrx_convert_sse2; |
| 180 mmxfct=jsimd_ycc_extbgrx_convert_mmx; |
| 181 break; |
| 182 case JCS_EXT_XBGR: |
| 183 sse2fct=jsimd_ycc_extxbgr_convert_sse2; |
| 184 mmxfct=jsimd_ycc_extxbgr_convert_mmx; |
| 185 break; |
| 186 case JCS_EXT_XRGB: |
| 187 sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
| 188 mmxfct=jsimd_ycc_extxrgb_convert_mmx; |
| 189 break; |
| 190 default: |
| 191 sse2fct=jsimd_ycc_rgb_convert_sse2; |
| 192 mmxfct=jsimd_ycc_rgb_convert_mmx; |
| 193 break; |
| 194 } |
| 195 |
| 196 if ((simd_support & JSIMD_SSE2) && |
| 197 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
| 198 sse2fct(cinfo->output_width, input_buf, |
| 199 input_row, output_buf, num_rows); |
| 200 else if (simd_support & JSIMD_MMX) |
| 201 mmxfct(cinfo->output_width, input_buf, |
| 202 input_row, output_buf, num_rows); |
| 203 } |
| 204 |
| 205 GLOBAL(int) |
| 206 jsimd_can_h2v2_downsample (void) |
| 207 { |
| 208 init_simd(); |
| 209 |
| 210 /* The code is optimised for these values only */ |
| 211 if (BITS_IN_JSAMPLE != 8) |
| 212 return 0; |
| 213 if (sizeof(JDIMENSION) != 4) |
| 214 return 0; |
| 215 |
| 216 if (simd_support & JSIMD_SSE2) |
| 217 return 1; |
| 218 if (simd_support & JSIMD_MMX) |
| 219 return 1; |
| 220 |
| 221 return 0; |
| 222 } |
| 223 |
| 224 GLOBAL(int) |
| 225 jsimd_can_h2v1_downsample (void) |
| 226 { |
| 227 init_simd(); |
| 228 |
| 229 /* The code is optimised for these values only */ |
| 230 if (BITS_IN_JSAMPLE != 8) |
| 231 return 0; |
| 232 if (sizeof(JDIMENSION) != 4) |
| 233 return 0; |
| 234 |
| 235 if (simd_support & JSIMD_SSE2) |
| 236 return 1; |
| 237 if (simd_support & JSIMD_MMX) |
| 238 return 1; |
| 239 |
| 240 return 0; |
| 241 } |
| 242 |
| 243 GLOBAL(void) |
| 244 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 245 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 246 { |
| 247 if (simd_support & JSIMD_SSE2) |
| 248 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 249 compptr->v_samp_factor, compptr->width_in_blocks, |
| 250 input_data, output_data); |
| 251 else if (simd_support & JSIMD_MMX) |
| 252 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
| 253 compptr->v_samp_factor, compptr->width_in_blocks, |
| 254 input_data, output_data); |
| 255 } |
| 256 |
| 257 GLOBAL(void) |
| 258 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 259 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 260 { |
| 261 if (simd_support & JSIMD_SSE2) |
| 262 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 263 compptr->v_samp_factor, compptr->width_in_blocks, |
| 264 input_data, output_data); |
| 265 else if (simd_support & JSIMD_MMX) |
| 266 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
| 267 compptr->v_samp_factor, compptr->width_in_blocks, |
| 268 input_data, output_data); |
| 269 } |
| 270 |
| 271 GLOBAL(int) |
| 272 jsimd_can_h2v2_upsample (void) |
| 273 { |
| 274 init_simd(); |
| 275 |
| 276 /* The code is optimised for these values only */ |
| 277 if (BITS_IN_JSAMPLE != 8) |
| 278 return 0; |
| 279 if (sizeof(JDIMENSION) != 4) |
| 280 return 0; |
| 281 |
| 282 if (simd_support & JSIMD_SSE2) |
| 283 return 1; |
| 284 if (simd_support & JSIMD_MMX) |
| 285 return 1; |
| 286 |
| 287 return 0; |
| 288 } |
| 289 |
| 290 GLOBAL(int) |
| 291 jsimd_can_h2v1_upsample (void) |
| 292 { |
| 293 init_simd(); |
| 294 |
| 295 /* The code is optimised for these values only */ |
| 296 if (BITS_IN_JSAMPLE != 8) |
| 297 return 0; |
| 298 if (sizeof(JDIMENSION) != 4) |
| 299 return 0; |
| 300 |
| 301 if (simd_support & JSIMD_SSE2) |
| 302 return 1; |
| 303 if (simd_support & JSIMD_MMX) |
| 304 return 1; |
| 305 |
| 306 return 0; |
| 307 } |
| 308 |
| 309 GLOBAL(void) |
| 310 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 311 jpeg_component_info * compptr, |
| 312 JSAMPARRAY input_data, |
| 313 JSAMPARRAY * output_data_ptr) |
| 314 { |
| 315 if (simd_support & JSIMD_SSE2) |
| 316 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, |
| 317 cinfo->output_width, input_data, output_data_ptr); |
| 318 else if (simd_support & JSIMD_MMX) |
| 319 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, |
| 320 cinfo->output_width, input_data, output_data_ptr); |
| 321 } |
| 322 |
| 323 GLOBAL(void) |
| 324 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 325 jpeg_component_info * compptr, |
| 326 JSAMPARRAY input_data, |
| 327 JSAMPARRAY * output_data_ptr) |
| 328 { |
| 329 if (simd_support & JSIMD_SSE2) |
| 330 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, |
| 331 cinfo->output_width, input_data, output_data_ptr); |
| 332 else if (simd_support & JSIMD_MMX) |
| 333 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, |
| 334 cinfo->output_width, input_data, output_data_ptr); |
| 335 } |
| 336 |
| 337 GLOBAL(int) |
| 338 jsimd_can_h2v2_fancy_upsample (void) |
| 339 { |
| 340 init_simd(); |
| 341 |
| 342 /* The code is optimised for these values only */ |
| 343 if (BITS_IN_JSAMPLE != 8) |
| 344 return 0; |
| 345 if (sizeof(JDIMENSION) != 4) |
| 346 return 0; |
| 347 |
| 348 if ((simd_support & JSIMD_SSE2) && |
| 349 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 350 return 1; |
| 351 if (simd_support & JSIMD_MMX) |
| 352 return 1; |
| 353 |
| 354 return 0; |
| 355 } |
| 356 |
| 357 GLOBAL(int) |
| 358 jsimd_can_h2v1_fancy_upsample (void) |
| 359 { |
| 360 init_simd(); |
| 361 |
| 362 /* The code is optimised for these values only */ |
| 363 if (BITS_IN_JSAMPLE != 8) |
| 364 return 0; |
| 365 if (sizeof(JDIMENSION) != 4) |
| 366 return 0; |
| 367 |
| 368 if ((simd_support & JSIMD_SSE2) && |
| 369 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 370 return 1; |
| 371 if (simd_support & JSIMD_MMX) |
| 372 return 1; |
| 373 |
| 374 return 0; |
| 375 } |
| 376 |
| 377 GLOBAL(void) |
| 378 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 379 jpeg_component_info * compptr, |
| 380 JSAMPARRAY input_data, |
| 381 JSAMPARRAY * output_data_ptr) |
| 382 { |
| 383 if ((simd_support & JSIMD_SSE2) && |
| 384 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 385 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 386 compptr->downsampled_width, input_data, output_data_ptr); |
| 387 else if (simd_support & JSIMD_MMX) |
| 388 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
| 389 compptr->downsampled_width, input_data, output_data_ptr); |
| 390 } |
| 391 |
| 392 GLOBAL(void) |
| 393 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 394 jpeg_component_info * compptr, |
| 395 JSAMPARRAY input_data, |
| 396 JSAMPARRAY * output_data_ptr) |
| 397 { |
| 398 if ((simd_support & JSIMD_SSE2) && |
| 399 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 400 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 401 compptr->downsampled_width, input_data, output_data_ptr); |
| 402 else if (simd_support & JSIMD_MMX) |
| 403 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
| 404 compptr->downsampled_width, input_data, output_data_ptr); |
| 405 } |
| 406 |
| 407 GLOBAL(int) |
| 408 jsimd_can_h2v2_merged_upsample (void) |
| 409 { |
| 410 init_simd(); |
| 411 |
| 412 /* The code is optimised for these values only */ |
| 413 if (BITS_IN_JSAMPLE != 8) |
| 414 return 0; |
| 415 if (sizeof(JDIMENSION) != 4) |
| 416 return 0; |
| 417 |
| 418 if ((simd_support & JSIMD_SSE2) && |
| 419 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 420 return 1; |
| 421 if (simd_support & JSIMD_MMX) |
| 422 return 1; |
| 423 |
| 424 return 0; |
| 425 } |
| 426 |
| 427 GLOBAL(int) |
| 428 jsimd_can_h2v1_merged_upsample (void) |
| 429 { |
| 430 init_simd(); |
| 431 |
| 432 /* The code is optimised for these values only */ |
| 433 if (BITS_IN_JSAMPLE != 8) |
| 434 return 0; |
| 435 if (sizeof(JDIMENSION) != 4) |
| 436 return 0; |
| 437 |
| 438 if ((simd_support & JSIMD_SSE2) && |
| 439 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 440 return 1; |
| 441 if (simd_support & JSIMD_MMX) |
| 442 return 1; |
| 443 |
| 444 return 0; |
| 445 } |
| 446 |
| 447 GLOBAL(void) |
| 448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
| 449 JSAMPIMAGE input_buf, |
| 450 JDIMENSION in_row_group_ctr, |
| 451 JSAMPARRAY output_buf) |
| 452 { |
| 453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 454 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 455 |
| 456 switch(cinfo->out_color_space) |
| 457 { |
| 458 case JCS_EXT_RGB: |
| 459 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
| 460 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; |
| 461 break; |
| 462 case JCS_EXT_RGBX: |
| 463 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
| 464 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; |
| 465 break; |
| 466 case JCS_EXT_BGR: |
| 467 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
| 468 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; |
| 469 break; |
| 470 case JCS_EXT_BGRX: |
| 471 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; |
| 472 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; |
| 473 break; |
| 474 case JCS_EXT_XBGR: |
| 475 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; |
| 476 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; |
| 477 break; |
| 478 case JCS_EXT_XRGB: |
| 479 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; |
| 480 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; |
| 481 break; |
| 482 default: |
| 483 sse2fct=jsimd_h2v2_merged_upsample_sse2; |
| 484 mmxfct=jsimd_h2v2_merged_upsample_mmx; |
| 485 break; |
| 486 } |
| 487 |
| 488 if ((simd_support & JSIMD_SSE2) && |
| 489 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 490 sse2fct(cinfo->output_width, input_buf, |
| 491 in_row_group_ctr, output_buf); |
| 492 else if (simd_support & JSIMD_MMX) |
| 493 mmxfct(cinfo->output_width, input_buf, |
| 494 in_row_group_ctr, output_buf); |
| 495 } |
| 496 |
| 497 GLOBAL(void) |
| 498 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
| 499 JSAMPIMAGE input_buf, |
| 500 JDIMENSION in_row_group_ctr, |
| 501 JSAMPARRAY output_buf) |
| 502 { |
| 503 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 504 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 505 |
| 506 switch(cinfo->out_color_space) |
| 507 { |
| 508 case JCS_EXT_RGB: |
| 509 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
| 510 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; |
| 511 break; |
| 512 case JCS_EXT_RGBX: |
| 513 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
| 514 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; |
| 515 break; |
| 516 case JCS_EXT_BGR: |
| 517 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
| 518 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; |
| 519 break; |
| 520 case JCS_EXT_BGRX: |
| 521 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; |
| 522 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; |
| 523 break; |
| 524 case JCS_EXT_XBGR: |
| 525 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; |
| 526 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; |
| 527 break; |
| 528 case JCS_EXT_XRGB: |
| 529 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
| 530 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; |
| 531 break; |
| 532 default: |
| 533 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
| 534 mmxfct=jsimd_h2v1_merged_upsample_mmx; |
| 535 break; |
| 536 } |
| 537 |
| 538 if ((simd_support & JSIMD_SSE2) && |
| 539 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 540 sse2fct(cinfo->output_width, input_buf, |
| 541 in_row_group_ctr, output_buf); |
| 542 else if (simd_support & JSIMD_MMX) |
| 543 mmxfct(cinfo->output_width, input_buf, |
| 544 in_row_group_ctr, output_buf); |
| 545 } |
| 546 |
| 547 GLOBAL(int) |
| 548 jsimd_can_convsamp (void) |
| 549 { |
| 550 init_simd(); |
| 551 |
| 552 /* The code is optimised for these values only */ |
| 553 if (DCTSIZE != 8) |
| 554 return 0; |
| 555 if (BITS_IN_JSAMPLE != 8) |
| 556 return 0; |
| 557 if (sizeof(JDIMENSION) != 4) |
| 558 return 0; |
| 559 if (sizeof(DCTELEM) != 2) |
| 560 return 0; |
| 561 |
| 562 if (simd_support & JSIMD_SSE2) |
| 563 return 1; |
| 564 if (simd_support & JSIMD_MMX) |
| 565 return 1; |
| 566 |
| 567 return 0; |
| 568 } |
| 569 |
| 570 GLOBAL(int) |
| 571 jsimd_can_convsamp_float (void) |
| 572 { |
| 573 init_simd(); |
| 574 |
| 575 /* The code is optimised for these values only */ |
| 576 if (DCTSIZE != 8) |
| 577 return 0; |
| 578 if (BITS_IN_JSAMPLE != 8) |
| 579 return 0; |
| 580 if (sizeof(JDIMENSION) != 4) |
| 581 return 0; |
| 582 if (sizeof(FAST_FLOAT) != 4) |
| 583 return 0; |
| 584 |
| 585 if (simd_support & JSIMD_SSE2) |
| 586 return 1; |
| 587 if (simd_support & JSIMD_SSE) |
| 588 return 1; |
| 589 if (simd_support & JSIMD_3DNOW) |
| 590 return 1; |
| 591 |
| 592 return 0; |
| 593 } |
| 594 |
| 595 GLOBAL(void) |
| 596 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 597 DCTELEM * workspace) |
| 598 { |
| 599 if (simd_support & JSIMD_SSE2) |
| 600 jsimd_convsamp_sse2(sample_data, start_col, workspace); |
| 601 else if (simd_support & JSIMD_MMX) |
| 602 jsimd_convsamp_mmx(sample_data, start_col, workspace); |
| 603 } |
| 604 |
| 605 GLOBAL(void) |
| 606 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 607 FAST_FLOAT * workspace) |
| 608 { |
| 609 if (simd_support & JSIMD_SSE2) |
| 610 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
| 611 else if (simd_support & JSIMD_SSE) |
| 612 jsimd_convsamp_float_sse(sample_data, start_col, workspace); |
| 613 else if (simd_support & JSIMD_3DNOW) |
| 614 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); |
| 615 } |
| 616 |
| 617 GLOBAL(int) |
| 618 jsimd_can_fdct_islow (void) |
| 619 { |
| 620 init_simd(); |
| 621 |
| 622 /* The code is optimised for these values only */ |
| 623 if (DCTSIZE != 8) |
| 624 return 0; |
| 625 if (sizeof(DCTELEM) != 2) |
| 626 return 0; |
| 627 |
| 628 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 629 return 1; |
| 630 if (simd_support & JSIMD_MMX) |
| 631 return 1; |
| 632 |
| 633 return 0; |
| 634 } |
| 635 |
| 636 GLOBAL(int) |
| 637 jsimd_can_fdct_ifast (void) |
| 638 { |
| 639 init_simd(); |
| 640 |
| 641 /* The code is optimised for these values only */ |
| 642 if (DCTSIZE != 8) |
| 643 return 0; |
| 644 if (sizeof(DCTELEM) != 2) |
| 645 return 0; |
| 646 |
| 647 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) |
| 648 return 1; |
| 649 if (simd_support & JSIMD_MMX) |
| 650 return 1; |
| 651 |
| 652 return 0; |
| 653 } |
| 654 |
| 655 GLOBAL(int) |
| 656 jsimd_can_fdct_float (void) |
| 657 { |
| 658 init_simd(); |
| 659 |
| 660 /* The code is optimised for these values only */ |
| 661 if (DCTSIZE != 8) |
| 662 return 0; |
| 663 if (sizeof(FAST_FLOAT) != 4) |
| 664 return 0; |
| 665 |
| 666 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
| 667 return 1; |
| 668 if (simd_support & JSIMD_3DNOW) |
| 669 return 1; |
| 670 |
| 671 return 0; |
| 672 } |
| 673 |
| 674 GLOBAL(void) |
| 675 jsimd_fdct_islow (DCTELEM * data) |
| 676 { |
| 677 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 678 jsimd_fdct_islow_sse2(data); |
| 679 else if (simd_support & JSIMD_MMX) |
| 680 jsimd_fdct_islow_mmx(data); |
| 681 } |
| 682 |
| 683 GLOBAL(void) |
| 684 jsimd_fdct_ifast (DCTELEM * data) |
| 685 { |
| 686 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 687 jsimd_fdct_ifast_sse2(data); |
| 688 else if (simd_support & JSIMD_MMX) |
| 689 jsimd_fdct_ifast_mmx(data); |
| 690 } |
| 691 |
| 692 GLOBAL(void) |
| 693 jsimd_fdct_float (FAST_FLOAT * data) |
| 694 { |
| 695 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
| 696 jsimd_fdct_float_sse(data); |
| 697 else if (simd_support & JSIMD_3DNOW) |
| 698 jsimd_fdct_float_3dnow(data); |
| 699 } |
| 700 |
| 701 GLOBAL(int) |
| 702 jsimd_can_quantize (void) |
| 703 { |
| 704 init_simd(); |
| 705 |
| 706 /* The code is optimised for these values only */ |
| 707 if (DCTSIZE != 8) |
| 708 return 0; |
| 709 if (sizeof(JCOEF) != 2) |
| 710 return 0; |
| 711 if (sizeof(DCTELEM) != 2) |
| 712 return 0; |
| 713 |
| 714 if (simd_support & JSIMD_SSE2) |
| 715 return 1; |
| 716 if (simd_support & JSIMD_MMX) |
| 717 return 1; |
| 718 |
| 719 return 0; |
| 720 } |
| 721 |
| 722 GLOBAL(int) |
| 723 jsimd_can_quantize_float (void) |
| 724 { |
| 725 init_simd(); |
| 726 |
| 727 /* The code is optimised for these values only */ |
| 728 if (DCTSIZE != 8) |
| 729 return 0; |
| 730 if (sizeof(JCOEF) != 2) |
| 731 return 0; |
| 732 if (sizeof(FAST_FLOAT) != 4) |
| 733 return 0; |
| 734 |
| 735 if (simd_support & JSIMD_SSE2) |
| 736 return 1; |
| 737 if (simd_support & JSIMD_SSE) |
| 738 return 1; |
| 739 if (simd_support & JSIMD_3DNOW) |
| 740 return 1; |
| 741 |
| 742 return 0; |
| 743 } |
| 744 |
| 745 GLOBAL(void) |
| 746 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
| 747 DCTELEM * workspace) |
| 748 { |
| 749 if (simd_support & JSIMD_SSE2) |
| 750 jsimd_quantize_sse2(coef_block, divisors, workspace); |
| 751 else if (simd_support & JSIMD_MMX) |
| 752 jsimd_quantize_mmx(coef_block, divisors, workspace); |
| 753 } |
| 754 |
| 755 GLOBAL(void) |
| 756 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
| 757 FAST_FLOAT * workspace) |
| 758 { |
| 759 if (simd_support & JSIMD_SSE2) |
| 760 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
| 761 else if (simd_support & JSIMD_SSE) |
| 762 jsimd_quantize_float_sse(coef_block, divisors, workspace); |
| 763 else if (simd_support & JSIMD_3DNOW) |
| 764 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
| 765 } |
| 766 |
| 767 GLOBAL(int) |
| 768 jsimd_can_idct_2x2 (void) |
| 769 { |
| 770 init_simd(); |
| 771 |
| 772 /* The code is optimised for these values only */ |
| 773 if (DCTSIZE != 8) |
| 774 return 0; |
| 775 if (sizeof(JCOEF) != 2) |
| 776 return 0; |
| 777 if (BITS_IN_JSAMPLE != 8) |
| 778 return 0; |
| 779 if (sizeof(JDIMENSION) != 4) |
| 780 return 0; |
| 781 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 782 return 0; |
| 783 |
| 784 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 785 return 1; |
| 786 if (simd_support & JSIMD_MMX) |
| 787 return 1; |
| 788 |
| 789 return 0; |
| 790 } |
| 791 |
| 792 GLOBAL(int) |
| 793 jsimd_can_idct_4x4 (void) |
| 794 { |
| 795 init_simd(); |
| 796 |
| 797 /* The code is optimised for these values only */ |
| 798 if (DCTSIZE != 8) |
| 799 return 0; |
| 800 if (sizeof(JCOEF) != 2) |
| 801 return 0; |
| 802 if (BITS_IN_JSAMPLE != 8) |
| 803 return 0; |
| 804 if (sizeof(JDIMENSION) != 4) |
| 805 return 0; |
| 806 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 807 return 0; |
| 808 |
| 809 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 810 return 1; |
| 811 if (simd_support & JSIMD_MMX) |
| 812 return 1; |
| 813 |
| 814 return 0; |
| 815 } |
| 816 |
| 817 GLOBAL(void) |
| 818 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 819 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 820 JDIMENSION output_col) |
| 821 { |
| 822 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 823 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
| 824 else if (simd_support & JSIMD_MMX) |
| 825 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
| 826 } |
| 827 |
| 828 GLOBAL(void) |
| 829 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 830 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 831 JDIMENSION output_col) |
| 832 { |
| 833 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 834 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
| 835 else if (simd_support & JSIMD_MMX) |
| 836 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
| 837 } |
| 838 |
| 839 GLOBAL(int) |
| 840 jsimd_can_idct_islow (void) |
| 841 { |
| 842 init_simd(); |
| 843 |
| 844 /* The code is optimised for these values only */ |
| 845 if (DCTSIZE != 8) |
| 846 return 0; |
| 847 if (sizeof(JCOEF) != 2) |
| 848 return 0; |
| 849 if (BITS_IN_JSAMPLE != 8) |
| 850 return 0; |
| 851 if (sizeof(JDIMENSION) != 4) |
| 852 return 0; |
| 853 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 854 return 0; |
| 855 |
| 856 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
| 857 return 1; |
| 858 if (simd_support & JSIMD_MMX) |
| 859 return 1; |
| 860 |
| 861 return 0; |
| 862 } |
| 863 |
| 864 GLOBAL(int) |
| 865 jsimd_can_idct_ifast (void) |
| 866 { |
| 867 init_simd(); |
| 868 |
| 869 /* The code is optimised for these values only */ |
| 870 if (DCTSIZE != 8) |
| 871 return 0; |
| 872 if (sizeof(JCOEF) != 2) |
| 873 return 0; |
| 874 if (BITS_IN_JSAMPLE != 8) |
| 875 return 0; |
| 876 if (sizeof(JDIMENSION) != 4) |
| 877 return 0; |
| 878 if (sizeof(IFAST_MULT_TYPE) != 2) |
| 879 return 0; |
| 880 if (IFAST_SCALE_BITS != 2) |
| 881 return 0; |
| 882 |
| 883 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
| 884 return 1; |
| 885 if (simd_support & JSIMD_MMX) |
| 886 return 1; |
| 887 |
| 888 return 0; |
| 889 } |
| 890 |
| 891 GLOBAL(int) |
| 892 jsimd_can_idct_float (void) |
| 893 { |
| 894 init_simd(); |
| 895 |
| 896 if (DCTSIZE != 8) |
| 897 return 0; |
| 898 if (sizeof(JCOEF) != 2) |
| 899 return 0; |
| 900 if (BITS_IN_JSAMPLE != 8) |
| 901 return 0; |
| 902 if (sizeof(JDIMENSION) != 4) |
| 903 return 0; |
| 904 if (sizeof(FAST_FLOAT) != 4) |
| 905 return 0; |
| 906 if (sizeof(FLOAT_MULT_TYPE) != 4) |
| 907 return 0; |
| 908 |
| 909 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
| 910 return 1; |
| 911 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
| 912 return 1; |
| 913 if (simd_support & JSIMD_3DNOW) |
| 914 return 1; |
| 915 |
| 916 return 0; |
| 917 } |
| 918 |
| 919 GLOBAL(void) |
| 920 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 921 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 922 JDIMENSION output_col) |
| 923 { |
| 924 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
| 925 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col
); |
| 926 else if (simd_support & JSIMD_MMX) |
| 927 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col)
; |
| 928 } |
| 929 |
| 930 GLOBAL(void) |
| 931 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 932 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 933 JDIMENSION output_col) |
| 934 { |
| 935 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
| 936 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col
); |
| 937 else if (simd_support & JSIMD_MMX) |
| 938 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col)
; |
| 939 } |
| 940 |
| 941 GLOBAL(void) |
| 942 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 943 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 944 JDIMENSION output_col) |
| 945 { |
| 946 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
| 947 jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
| 948 output_buf, output_col); |
| 949 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
| 950 jsimd_idct_float_sse(compptr->dct_table, coef_block, |
| 951 output_buf, output_col); |
| 952 else if (simd_support & JSIMD_3DNOW) |
| 953 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
| 954 output_buf, output_col); |
| 955 } |
| 956 |
OLD | NEW |