| OLD | NEW |
| 1 /* | 1 /* |
| 2 * jsimd_x86_64.c | 2 * jsimd_x86_64.c |
| 3 * | 3 * |
| 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 * Copyright 2009-2011, 2014 D. R. Commander | 5 * Copyright 2009-2011, 2014, 2016 D. R. Commander |
| 6 * | 6 * Copyright 2015 Matthieu Darbois |
| 7 * |
| 7 * Based on the x86 SIMD extension for IJG JPEG library, | 8 * Based on the x86 SIMD extension for IJG JPEG library, |
| 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 * | 11 * |
| 11 * This file contains the interface between the "normal" portions | 12 * This file contains the interface between the "normal" portions |
| 12 * of the library and the SIMD implementations when running on a | 13 * of the library and the SIMD implementations when running on a |
| 13 * x86_64 architecture. | 14 * 64-bit x86 architecture. |
| 14 */ | 15 */ |
| 15 | 16 |
| 16 #define JPEG_INTERNALS | 17 #define JPEG_INTERNALS |
| 17 #include "../jinclude.h" | 18 #include "../jinclude.h" |
| 18 #include "../jpeglib.h" | 19 #include "../jpeglib.h" |
| 19 #include "../jsimd.h" | 20 #include "../jsimd.h" |
| 20 #include "../jdct.h" | 21 #include "../jdct.h" |
| 21 #include "../jsimddct.h" | 22 #include "../jsimddct.h" |
| 22 #include "jsimd.h" | 23 #include "jsimd.h" |
| 23 | 24 |
| 24 /* | 25 /* |
| 25 * In the PIC cases, we have no guarantee that constants will keep | 26 * In the PIC cases, we have no guarantee that constants will keep |
| 26 * their alignment. This macro allows us to verify it at runtime. | 27 * their alignment. This macro allows us to verify it at runtime. |
| 27 */ | 28 */ |
| 28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) | 29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) |
| 29 | 30 |
| 30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ | 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
| 31 | 32 |
| 32 #ifndef JPEG_DECODE_ONLY | 33 static unsigned int simd_support = ~0; |
| 34 static unsigned int simd_huffman = 1; |
| 35 |
| 36 /* |
| 37 * Check what SIMD accelerations are supported. |
| 38 * |
| 39 * FIXME: This code is racy under a multi-threaded environment. |
| 40 */ |
| 41 LOCAL(void) |
| 42 init_simd (void) |
| 43 { |
| 44 char *env = NULL; |
| 45 |
| 46 if (simd_support != ~0U) |
| 47 return; |
| 48 |
| 49 simd_support = JSIMD_SSE2 | JSIMD_SSE; |
| 50 |
| 51 /* Force different settings through environment variables */ |
| 52 env = getenv("JSIMD_FORCENONE"); |
| 53 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 54 simd_support = 0; |
| 55 env = getenv("JSIMD_NOHUFFENC"); |
| 56 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 57 simd_huffman = 0; |
| 58 } |
| 59 |
| 33 GLOBAL(int) | 60 GLOBAL(int) |
| 34 jsimd_can_rgb_ycc (void) | 61 jsimd_can_rgb_ycc (void) |
| 35 { | 62 { |
| 63 init_simd(); |
| 64 |
| 36 /* The code is optimised for these values only */ | 65 /* The code is optimised for these values only */ |
| 37 if (BITS_IN_JSAMPLE != 8) | 66 if (BITS_IN_JSAMPLE != 8) |
| 38 return 0; | 67 return 0; |
| 39 if (sizeof(JDIMENSION) != 4) | 68 if (sizeof(JDIMENSION) != 4) |
| 40 return 0; | 69 return 0; |
| 41 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 42 return 0; | 71 return 0; |
| 43 | 72 |
| 44 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 73 if ((simd_support & JSIMD_SSE2) && |
| 45 return 0; | 74 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 75 return 1; |
| 46 | 76 |
| 47 return 1; | 77 return 0; |
| 48 } | 78 } |
| 49 #endif | |
| 50 | 79 |
| 51 GLOBAL(int) | 80 GLOBAL(int) |
| 52 jsimd_can_rgb_gray (void) | 81 jsimd_can_rgb_gray (void) |
| 53 { | 82 { |
| 83 init_simd(); |
| 84 |
| 54 /* The code is optimised for these values only */ | 85 /* The code is optimised for these values only */ |
| 55 if (BITS_IN_JSAMPLE != 8) | 86 if (BITS_IN_JSAMPLE != 8) |
| 56 return 0; | 87 return 0; |
| 57 if (sizeof(JDIMENSION) != 4) | 88 if (sizeof(JDIMENSION) != 4) |
| 58 return 0; | 89 return 0; |
| 59 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 90 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 60 return 0; | 91 return 0; |
| 61 | 92 |
| 62 if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) | 93 if ((simd_support & JSIMD_SSE2) && |
| 63 return 0; | 94 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
| 95 return 1; |
| 64 | 96 |
| 65 return 1; | 97 return 0; |
| 66 } | 98 } |
| 67 | 99 |
| 68 GLOBAL(int) | 100 GLOBAL(int) |
| 69 jsimd_can_ycc_rgb (void) | 101 jsimd_can_ycc_rgb (void) |
| 70 { | 102 { |
| 103 init_simd(); |
| 104 |
| 71 /* The code is optimised for these values only */ | 105 /* The code is optimised for these values only */ |
| 72 if (BITS_IN_JSAMPLE != 8) | 106 if (BITS_IN_JSAMPLE != 8) |
| 73 return 0; | 107 return 0; |
| 74 if (sizeof(JDIMENSION) != 4) | 108 if (sizeof(JDIMENSION) != 4) |
| 75 return 0; | 109 return 0; |
| 76 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 110 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 77 return 0; | 111 return 0; |
| 78 | 112 |
| 79 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 113 if ((simd_support & JSIMD_SSE2) && |
| 80 return 0; | 114 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
| 115 return 1; |
| 81 | 116 |
| 82 return 1; | 117 return 0; |
| 83 } | 118 } |
| 84 | 119 |
| 85 GLOBAL(int) | 120 GLOBAL(int) |
| 86 jsimd_can_ycc_rgb565 (void) | 121 jsimd_can_ycc_rgb565 (void) |
| 87 { | 122 { |
| 88 return 0; | 123 return 0; |
| 89 } | 124 } |
| 90 | 125 |
| 91 #ifndef JPEG_DECODE_ONLY | |
| 92 GLOBAL(void) | 126 GLOBAL(void) |
| 93 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 94 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 128 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 95 JDIMENSION output_row, int num_rows) | 129 JDIMENSION output_row, int num_rows) |
| 96 { | 130 { |
| 97 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 131 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 98 | 132 |
| 99 switch(cinfo->in_color_space) | 133 switch(cinfo->in_color_space) { |
| 100 { | |
| 101 case JCS_EXT_RGB: | 134 case JCS_EXT_RGB: |
| 102 sse2fct=jsimd_extrgb_ycc_convert_sse2; | 135 sse2fct=jsimd_extrgb_ycc_convert_sse2; |
| 103 break; | 136 break; |
| 104 case JCS_EXT_RGBX: | 137 case JCS_EXT_RGBX: |
| 105 case JCS_EXT_RGBA: | 138 case JCS_EXT_RGBA: |
| 106 sse2fct=jsimd_extrgbx_ycc_convert_sse2; | 139 sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
| 107 break; | 140 break; |
| 108 case JCS_EXT_BGR: | 141 case JCS_EXT_BGR: |
| 109 sse2fct=jsimd_extbgr_ycc_convert_sse2; | 142 sse2fct=jsimd_extbgr_ycc_convert_sse2; |
| 110 break; | 143 break; |
| 111 case JCS_EXT_BGRX: | 144 case JCS_EXT_BGRX: |
| 112 case JCS_EXT_BGRA: | 145 case JCS_EXT_BGRA: |
| 113 sse2fct=jsimd_extbgrx_ycc_convert_sse2; | 146 sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
| 114 break; | 147 break; |
| 115 case JCS_EXT_XBGR: | 148 case JCS_EXT_XBGR: |
| 116 case JCS_EXT_ABGR: | 149 case JCS_EXT_ABGR: |
| 117 sse2fct=jsimd_extxbgr_ycc_convert_sse2; | 150 sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
| 118 break; | 151 break; |
| 119 case JCS_EXT_XRGB: | 152 case JCS_EXT_XRGB: |
| 120 case JCS_EXT_ARGB: | 153 case JCS_EXT_ARGB: |
| 121 sse2fct=jsimd_extxrgb_ycc_convert_sse2; | 154 sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
| 122 break; | 155 break; |
| 123 default: | 156 default: |
| 124 sse2fct=jsimd_rgb_ycc_convert_sse2; | 157 sse2fct=jsimd_rgb_ycc_convert_sse2; |
| 125 break; | 158 break; |
| 126 } | 159 } |
| 127 | 160 |
| 128 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 161 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 129 } | 162 } |
| 130 #endif | |
| 131 | 163 |
| 132 GLOBAL(void) | 164 GLOBAL(void) |
| 133 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 165 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 134 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 166 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 135 JDIMENSION output_row, int num_rows) | 167 JDIMENSION output_row, int num_rows) |
| 136 { | 168 { |
| 137 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 169 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 138 | 170 |
| 139 switch(cinfo->in_color_space) | 171 switch(cinfo->in_color_space) { |
| 140 { | |
| 141 case JCS_EXT_RGB: | 172 case JCS_EXT_RGB: |
| 142 sse2fct=jsimd_extrgb_gray_convert_sse2; | 173 sse2fct=jsimd_extrgb_gray_convert_sse2; |
| 143 break; | 174 break; |
| 144 case JCS_EXT_RGBX: | 175 case JCS_EXT_RGBX: |
| 145 case JCS_EXT_RGBA: | 176 case JCS_EXT_RGBA: |
| 146 sse2fct=jsimd_extrgbx_gray_convert_sse2; | 177 sse2fct=jsimd_extrgbx_gray_convert_sse2; |
| 147 break; | 178 break; |
| 148 case JCS_EXT_BGR: | 179 case JCS_EXT_BGR: |
| 149 sse2fct=jsimd_extbgr_gray_convert_sse2; | 180 sse2fct=jsimd_extbgr_gray_convert_sse2; |
| 150 break; | 181 break; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 168 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 199 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 169 } | 200 } |
| 170 | 201 |
| 171 GLOBAL(void) | 202 GLOBAL(void) |
| 172 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 173 JSAMPIMAGE input_buf, JDIMENSION input_row, | 204 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 174 JSAMPARRAY output_buf, int num_rows) | 205 JSAMPARRAY output_buf, int num_rows) |
| 175 { | 206 { |
| 176 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 207 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 177 | 208 |
| 178 switch(cinfo->out_color_space) | 209 switch(cinfo->out_color_space) { |
| 179 { | |
| 180 case JCS_EXT_RGB: | 210 case JCS_EXT_RGB: |
| 181 sse2fct=jsimd_ycc_extrgb_convert_sse2; | 211 sse2fct=jsimd_ycc_extrgb_convert_sse2; |
| 182 break; | 212 break; |
| 183 case JCS_EXT_RGBX: | 213 case JCS_EXT_RGBX: |
| 184 case JCS_EXT_RGBA: | 214 case JCS_EXT_RGBA: |
| 185 sse2fct=jsimd_ycc_extrgbx_convert_sse2; | 215 sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
| 186 break; | 216 break; |
| 187 case JCS_EXT_BGR: | 217 case JCS_EXT_BGR: |
| 188 sse2fct=jsimd_ycc_extbgr_convert_sse2; | 218 sse2fct=jsimd_ycc_extbgr_convert_sse2; |
| 189 break; | 219 break; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 207 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 237 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 208 } | 238 } |
| 209 | 239 |
| 210 GLOBAL(void) | 240 GLOBAL(void) |
| 211 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | 241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
| 212 JSAMPIMAGE input_buf, JDIMENSION input_row, | 242 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 213 JSAMPARRAY output_buf, int num_rows) | 243 JSAMPARRAY output_buf, int num_rows) |
| 214 { | 244 { |
| 215 } | 245 } |
| 216 | 246 |
| 217 #ifndef JPEG_DECODE_ONLY | |
| 218 GLOBAL(int) | 247 GLOBAL(int) |
| 219 jsimd_can_h2v2_downsample (void) | 248 jsimd_can_h2v2_downsample (void) |
| 220 { | 249 { |
| 221 /* The code is optimised for these values only */ | 250 init_simd(); |
| 222 if (BITS_IN_JSAMPLE != 8) | 251 |
| 223 return 0; | 252 /* The code is optimised for these values only */ |
| 224 if (sizeof(JDIMENSION) != 4) | 253 if (BITS_IN_JSAMPLE != 8) |
| 225 return 0; | 254 return 0; |
| 226 | 255 if (sizeof(JDIMENSION) != 4) |
| 227 return 1; | 256 return 0; |
| 257 |
| 258 if (simd_support & JSIMD_SSE2) |
| 259 return 1; |
| 260 |
| 261 return 0; |
| 228 } | 262 } |
| 229 | 263 |
| 230 GLOBAL(int) | 264 GLOBAL(int) |
| 231 jsimd_can_h2v1_downsample (void) | 265 jsimd_can_h2v1_downsample (void) |
| 232 { | 266 { |
| 233 /* The code is optimised for these values only */ | 267 init_simd(); |
| 234 if (BITS_IN_JSAMPLE != 8) | 268 |
| 235 return 0; | 269 /* The code is optimised for these values only */ |
| 236 if (sizeof(JDIMENSION) != 4) | 270 if (BITS_IN_JSAMPLE != 8) |
| 237 return 0; | 271 return 0; |
| 238 | 272 if (sizeof(JDIMENSION) != 4) |
| 239 return 1; | 273 return 0; |
| 240 } | 274 |
| 241 | 275 if (simd_support & JSIMD_SSE2) |
| 242 GLOBAL(void) | 276 return 1; |
| 243 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 277 |
| 278 return 0; |
| 279 } |
| 280 |
| 281 GLOBAL(void) |
| 282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
| 244 JSAMPARRAY input_data, JSAMPARRAY output_data) | 283 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 245 { | 284 { |
| 246 jsimd_h2v2_downsample_sse2(cinfo->image_width, | 285 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 247 cinfo->max_v_samp_factor, | 286 compptr->v_samp_factor, compptr->width_in_blocks, |
| 248 compptr->v_samp_factor, | |
| 249 compptr->width_in_blocks, | |
| 250 input_data, output_data); | 287 input_data, output_data); |
| 251 } | 288 } |
| 252 | 289 |
| 253 GLOBAL(void) | 290 GLOBAL(void) |
| 254 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
| 255 JSAMPARRAY input_data, JSAMPARRAY output_data) | 292 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 256 { | 293 { |
| 257 jsimd_h2v1_downsample_sse2(cinfo->image_width, | 294 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 258 cinfo->max_v_samp_factor, | 295 compptr->v_samp_factor, compptr->width_in_blocks, |
| 259 compptr->v_samp_factor, | |
| 260 compptr->width_in_blocks, | |
| 261 input_data, output_data); | 296 input_data, output_data); |
| 262 } | 297 } |
| 263 #endif | |
| 264 | 298 |
| 265 GLOBAL(int) | 299 GLOBAL(int) |
| 266 jsimd_can_h2v2_upsample (void) | 300 jsimd_can_h2v2_upsample (void) |
| 267 { | 301 { |
| 268 /* The code is optimised for these values only */ | 302 init_simd(); |
| 269 if (BITS_IN_JSAMPLE != 8) | 303 |
| 270 return 0; | 304 /* The code is optimised for these values only */ |
| 271 if (sizeof(JDIMENSION) != 4) | 305 if (BITS_IN_JSAMPLE != 8) |
| 272 return 0; | 306 return 0; |
| 273 | 307 if (sizeof(JDIMENSION) != 4) |
| 274 return 1; | 308 return 0; |
| 309 |
| 310 if (simd_support & JSIMD_SSE2) |
| 311 return 1; |
| 312 |
| 313 return 0; |
| 275 } | 314 } |
| 276 | 315 |
| 277 GLOBAL(int) | 316 GLOBAL(int) |
| 278 jsimd_can_h2v1_upsample (void) | 317 jsimd_can_h2v1_upsample (void) |
| 279 { | 318 { |
| 280 /* The code is optimised for these values only */ | 319 init_simd(); |
| 281 if (BITS_IN_JSAMPLE != 8) | 320 |
| 282 return 0; | 321 /* The code is optimised for these values only */ |
| 283 if (sizeof(JDIMENSION) != 4) | 322 if (BITS_IN_JSAMPLE != 8) |
| 284 return 0; | 323 return 0; |
| 285 | 324 if (sizeof(JDIMENSION) != 4) |
| 286 return 1; | 325 return 0; |
| 326 |
| 327 if (simd_support & JSIMD_SSE2) |
| 328 return 1; |
| 329 |
| 330 return 0; |
| 287 } | 331 } |
| 288 | 332 |
| 289 GLOBAL(void) | 333 GLOBAL(void) |
| 290 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 334 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 291 jpeg_component_info * compptr, | 335 jpeg_component_info *compptr, |
| 292 JSAMPARRAY input_data, | 336 JSAMPARRAY input_data, |
| 293 JSAMPARRAY * output_data_ptr) | 337 JSAMPARRAY *output_data_ptr) |
| 294 { | 338 { |
| 295 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, | 339 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
| 296 cinfo->output_width, | |
| 297 input_data, output_data_ptr); | 340 input_data, output_data_ptr); |
| 298 } | 341 } |
| 299 | 342 |
| 300 GLOBAL(void) | 343 GLOBAL(void) |
| 301 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 344 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 302 jpeg_component_info * compptr, | 345 jpeg_component_info *compptr, |
| 303 JSAMPARRAY input_data, | 346 JSAMPARRAY input_data, |
| 304 JSAMPARRAY * output_data_ptr) | 347 JSAMPARRAY *output_data_ptr) |
| 305 { | 348 { |
| 306 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, | 349 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
| 307 cinfo->output_width, | |
| 308 input_data, output_data_ptr); | 350 input_data, output_data_ptr); |
| 309 } | 351 } |
| 310 | 352 |
| 311 GLOBAL(int) | 353 GLOBAL(int) |
| 312 jsimd_can_h2v2_fancy_upsample (void) | 354 jsimd_can_h2v2_fancy_upsample (void) |
| 313 { | 355 { |
| 314 /* The code is optimised for these values only */ | 356 init_simd(); |
| 315 if (BITS_IN_JSAMPLE != 8) | 357 |
| 316 return 0; | 358 /* The code is optimised for these values only */ |
| 317 if (sizeof(JDIMENSION) != 4) | 359 if (BITS_IN_JSAMPLE != 8) |
| 318 return 0; | 360 return 0; |
| 319 | 361 if (sizeof(JDIMENSION) != 4) |
| 320 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 362 return 0; |
| 321 return 0; | 363 |
| 322 | 364 if ((simd_support & JSIMD_SSE2) && |
| 323 return 1; | 365 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 366 return 1; |
| 367 |
| 368 return 0; |
| 324 } | 369 } |
| 325 | 370 |
| 326 GLOBAL(int) | 371 GLOBAL(int) |
| 327 jsimd_can_h2v1_fancy_upsample (void) | 372 jsimd_can_h2v1_fancy_upsample (void) |
| 328 { | 373 { |
| 329 /* The code is optimised for these values only */ | 374 init_simd(); |
| 330 if (BITS_IN_JSAMPLE != 8) | 375 |
| 331 return 0; | 376 /* The code is optimised for these values only */ |
| 332 if (sizeof(JDIMENSION) != 4) | 377 if (BITS_IN_JSAMPLE != 8) |
| 333 return 0; | 378 return 0; |
| 334 | 379 if (sizeof(JDIMENSION) != 4) |
| 335 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 380 return 0; |
| 336 return 0; | 381 |
| 337 | 382 if ((simd_support & JSIMD_SSE2) && |
| 338 return 1; | 383 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 384 return 1; |
| 385 |
| 386 return 0; |
| 339 } | 387 } |
| 340 | 388 |
| 341 GLOBAL(void) | 389 GLOBAL(void) |
| 342 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 343 jpeg_component_info * compptr, | 391 jpeg_component_info *compptr, |
| 344 JSAMPARRAY input_data, | 392 JSAMPARRAY input_data, |
| 345 JSAMPARRAY * output_data_ptr) | 393 JSAMPARRAY *output_data_ptr) |
| 346 { | 394 { |
| 347 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 395 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 348 compptr->downsampled_width, | 396 compptr->downsampled_width, input_data, |
| 349 input_data, output_data_ptr); | 397 output_data_ptr); |
| 350 } | 398 } |
| 351 | 399 |
| 352 GLOBAL(void) | 400 GLOBAL(void) |
| 353 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 354 jpeg_component_info * compptr, | 402 jpeg_component_info *compptr, |
| 355 JSAMPARRAY input_data, | 403 JSAMPARRAY input_data, |
| 356 JSAMPARRAY * output_data_ptr) | 404 JSAMPARRAY *output_data_ptr) |
| 357 { | 405 { |
| 358 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 406 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 359 compptr->downsampled_width, | 407 compptr->downsampled_width, input_data, |
| 360 input_data, output_data_ptr); | 408 output_data_ptr); |
| 361 } | 409 } |
| 362 | 410 |
| 363 GLOBAL(int) | 411 GLOBAL(int) |
| 364 jsimd_can_h2v2_merged_upsample (void) | 412 jsimd_can_h2v2_merged_upsample (void) |
| 365 { | 413 { |
| 366 /* The code is optimised for these values only */ | 414 init_simd(); |
| 367 if (BITS_IN_JSAMPLE != 8) | 415 |
| 368 return 0; | 416 /* The code is optimised for these values only */ |
| 369 if (sizeof(JDIMENSION) != 4) | 417 if (BITS_IN_JSAMPLE != 8) |
| 370 return 0; | 418 return 0; |
| 371 | 419 if (sizeof(JDIMENSION) != 4) |
| 372 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 420 return 0; |
| 373 return 0; | 421 |
| 374 | 422 if ((simd_support & JSIMD_SSE2) && |
| 375 return 1; | 423 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 424 return 1; |
| 425 |
| 426 return 0; |
| 376 } | 427 } |
| 377 | 428 |
| 378 GLOBAL(int) | 429 GLOBAL(int) |
| 379 jsimd_can_h2v1_merged_upsample (void) | 430 jsimd_can_h2v1_merged_upsample (void) |
| 380 { | 431 { |
| 381 /* The code is optimised for these values only */ | 432 init_simd(); |
| 382 if (BITS_IN_JSAMPLE != 8) | 433 |
| 383 return 0; | 434 /* The code is optimised for these values only */ |
| 384 if (sizeof(JDIMENSION) != 4) | 435 if (BITS_IN_JSAMPLE != 8) |
| 385 return 0; | 436 return 0; |
| 386 | 437 if (sizeof(JDIMENSION) != 4) |
| 387 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 438 return 0; |
| 388 return 0; | 439 |
| 389 | 440 if ((simd_support & JSIMD_SSE2) && |
| 390 return 1; | 441 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 391 } | 442 return 1; |
| 392 | 443 |
| 393 GLOBAL(void) | 444 return 0; |
| 445 } |
| 446 |
| 447 GLOBAL(void) |
| 394 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, | 448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
| 395 JSAMPIMAGE input_buf, | 449 JSAMPIMAGE input_buf, |
| 396 JDIMENSION in_row_group_ctr, | 450 JDIMENSION in_row_group_ctr, |
| 397 JSAMPARRAY output_buf) | 451 JSAMPARRAY output_buf) |
| 398 { | 452 { |
| 399 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 400 | 454 |
| 401 switch(cinfo->out_color_space) | 455 switch(cinfo->out_color_space) { |
| 402 { | |
| 403 case JCS_EXT_RGB: | 456 case JCS_EXT_RGB: |
| 404 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; | 457 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
| 405 break; | 458 break; |
| 406 case JCS_EXT_RGBX: | 459 case JCS_EXT_RGBX: |
| 407 case JCS_EXT_RGBA: | 460 case JCS_EXT_RGBA: |
| 408 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; | 461 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
| 409 break; | 462 break; |
| 410 case JCS_EXT_BGR: | 463 case JCS_EXT_BGR: |
| 411 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; | 464 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
| 412 break; | 465 break; |
| (...skipping 18 matching lines...) Expand all Loading... |
| 431 } | 484 } |
| 432 | 485 |
| 433 GLOBAL(void) | 486 GLOBAL(void) |
| 434 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, | 487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
| 435 JSAMPIMAGE input_buf, | 488 JSAMPIMAGE input_buf, |
| 436 JDIMENSION in_row_group_ctr, | 489 JDIMENSION in_row_group_ctr, |
| 437 JSAMPARRAY output_buf) | 490 JSAMPARRAY output_buf) |
| 438 { | 491 { |
| 439 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 492 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 440 | 493 |
| 441 switch(cinfo->out_color_space) | 494 switch(cinfo->out_color_space) { |
| 442 { | |
| 443 case JCS_EXT_RGB: | 495 case JCS_EXT_RGB: |
| 444 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; | 496 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
| 445 break; | 497 break; |
| 446 case JCS_EXT_RGBX: | 498 case JCS_EXT_RGBX: |
| 447 case JCS_EXT_RGBA: | 499 case JCS_EXT_RGBA: |
| 448 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; | 500 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
| 449 break; | 501 break; |
| 450 case JCS_EXT_BGR: | 502 case JCS_EXT_BGR: |
| 451 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; | 503 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
| 452 break; | 504 break; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 463 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; | 515 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
| 464 break; | 516 break; |
| 465 default: | 517 default: |
| 466 sse2fct=jsimd_h2v1_merged_upsample_sse2; | 518 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
| 467 break; | 519 break; |
| 468 } | 520 } |
| 469 | 521 |
| 470 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 522 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 471 } | 523 } |
| 472 | 524 |
| 473 #ifndef JPEG_DECODE_ONLY | |
| 474 GLOBAL(int) | 525 GLOBAL(int) |
| 475 jsimd_can_convsamp (void) | 526 jsimd_can_convsamp (void) |
| 476 { | 527 { |
| 528 init_simd(); |
| 529 |
| 477 /* The code is optimised for these values only */ | 530 /* The code is optimised for these values only */ |
| 478 if (DCTSIZE != 8) | 531 if (DCTSIZE != 8) |
| 479 return 0; | 532 return 0; |
| 480 if (BITS_IN_JSAMPLE != 8) | 533 if (BITS_IN_JSAMPLE != 8) |
| 481 return 0; | 534 return 0; |
| 482 if (sizeof(JDIMENSION) != 4) | 535 if (sizeof(JDIMENSION) != 4) |
| 483 return 0; | 536 return 0; |
| 484 if (sizeof(DCTELEM) != 2) | 537 if (sizeof(DCTELEM) != 2) |
| 485 return 0; | 538 return 0; |
| 486 | 539 |
| 487 return 1; | 540 if (simd_support & JSIMD_SSE2) |
| 541 return 1; |
| 542 |
| 543 return 0; |
| 488 } | 544 } |
| 489 | 545 |
| 490 GLOBAL(int) | 546 GLOBAL(int) |
| 491 jsimd_can_convsamp_float (void) | 547 jsimd_can_convsamp_float (void) |
| 492 { | 548 { |
| 549 init_simd(); |
| 550 |
| 493 /* The code is optimised for these values only */ | 551 /* The code is optimised for these values only */ |
| 494 if (DCTSIZE != 8) | 552 if (DCTSIZE != 8) |
| 495 return 0; | 553 return 0; |
| 496 if (BITS_IN_JSAMPLE != 8) | 554 if (BITS_IN_JSAMPLE != 8) |
| 497 return 0; | 555 return 0; |
| 498 if (sizeof(JDIMENSION) != 4) | 556 if (sizeof(JDIMENSION) != 4) |
| 499 return 0; | 557 return 0; |
| 500 if (sizeof(FAST_FLOAT) != 4) | 558 if (sizeof(FAST_FLOAT) != 4) |
| 501 return 0; | 559 return 0; |
| 502 | 560 |
| 503 return 1; | 561 if (simd_support & JSIMD_SSE2) |
| 562 return 1; |
| 563 |
| 564 return 0; |
| 504 } | 565 } |
| 505 | 566 |
| 506 GLOBAL(void) | 567 GLOBAL(void) |
| 507 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 508 DCTELEM * workspace) | 569 DCTELEM *workspace) |
| 509 { | 570 { |
| 510 jsimd_convsamp_sse2(sample_data, start_col, workspace); | 571 jsimd_convsamp_sse2(sample_data, start_col, workspace); |
| 511 } | 572 } |
| 512 | 573 |
| 513 GLOBAL(void) | 574 GLOBAL(void) |
| 514 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 515 FAST_FLOAT * workspace) | 576 FAST_FLOAT *workspace) |
| 516 { | 577 { |
| 517 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); | 578 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
| 518 } | 579 } |
| 519 | 580 |
| 520 GLOBAL(int) | 581 GLOBAL(int) |
| 521 jsimd_can_fdct_islow (void) | 582 jsimd_can_fdct_islow (void) |
| 522 { | 583 { |
| 584 init_simd(); |
| 585 |
| 523 /* The code is optimised for these values only */ | 586 /* The code is optimised for these values only */ |
| 524 if (DCTSIZE != 8) | 587 if (DCTSIZE != 8) |
| 525 return 0; | 588 return 0; |
| 526 if (sizeof(DCTELEM) != 2) | 589 if (sizeof(DCTELEM) != 2) |
| 527 return 0; | 590 return 0; |
| 528 | 591 |
| 529 if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) | 592 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 530 return 0; | 593 return 1; |
| 531 | 594 |
| 532 return 1; | 595 return 0; |
| 533 } | 596 } |
| 534 | 597 |
| 535 GLOBAL(int) | 598 GLOBAL(int) |
| 536 jsimd_can_fdct_ifast (void) | 599 jsimd_can_fdct_ifast (void) |
| 537 { | 600 { |
| 601 init_simd(); |
| 602 |
| 538 /* The code is optimised for these values only */ | 603 /* The code is optimised for these values only */ |
| 539 if (DCTSIZE != 8) | 604 if (DCTSIZE != 8) |
| 540 return 0; | 605 return 0; |
| 541 if (sizeof(DCTELEM) != 2) | 606 if (sizeof(DCTELEM) != 2) |
| 542 return 0; | 607 return 0; |
| 543 | 608 |
| 544 if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) | 609 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) |
| 545 return 0; | 610 return 1; |
| 546 | 611 |
| 547 return 1; | 612 return 0; |
| 548 } | 613 } |
| 549 | 614 |
| 550 GLOBAL(int) | 615 GLOBAL(int) |
| 551 jsimd_can_fdct_float (void) | 616 jsimd_can_fdct_float (void) |
| 552 { | 617 { |
| 618 init_simd(); |
| 619 |
| 553 /* The code is optimised for these values only */ | 620 /* The code is optimised for these values only */ |
| 554 if (DCTSIZE != 8) | 621 if (DCTSIZE != 8) |
| 555 return 0; | 622 return 0; |
| 556 if (sizeof(FAST_FLOAT) != 4) | 623 if (sizeof(FAST_FLOAT) != 4) |
| 557 return 0; | 624 return 0; |
| 558 | 625 |
| 559 if (!IS_ALIGNED_SSE(jconst_fdct_float_sse)) | 626 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
| 560 return 0; | 627 return 1; |
| 561 | 628 |
| 562 return 1; | 629 return 0; |
| 563 } | 630 } |
| 564 | 631 |
| 565 GLOBAL(void) | 632 GLOBAL(void) |
| 566 jsimd_fdct_islow (DCTELEM * data) | 633 jsimd_fdct_islow (DCTELEM *data) |
| 567 { | 634 { |
| 568 jsimd_fdct_islow_sse2(data); | 635 jsimd_fdct_islow_sse2(data); |
| 569 } | 636 } |
| 570 | 637 |
| 571 GLOBAL(void) | 638 GLOBAL(void) |
| 572 jsimd_fdct_ifast (DCTELEM * data) | 639 jsimd_fdct_ifast (DCTELEM *data) |
| 573 { | 640 { |
| 574 jsimd_fdct_ifast_sse2(data); | 641 jsimd_fdct_ifast_sse2(data); |
| 575 } | 642 } |
| 576 | 643 |
| 577 GLOBAL(void) | 644 GLOBAL(void) |
| 578 jsimd_fdct_float (FAST_FLOAT * data) | 645 jsimd_fdct_float (FAST_FLOAT *data) |
| 579 { | 646 { |
| 580 jsimd_fdct_float_sse(data); | 647 jsimd_fdct_float_sse(data); |
| 581 } | 648 } |
| 582 | 649 |
| 583 GLOBAL(int) | 650 GLOBAL(int) |
| 584 jsimd_can_quantize (void) | 651 jsimd_can_quantize (void) |
| 585 { | 652 { |
| 653 init_simd(); |
| 654 |
| 586 /* The code is optimised for these values only */ | 655 /* The code is optimised for these values only */ |
| 587 if (DCTSIZE != 8) | 656 if (DCTSIZE != 8) |
| 588 return 0; | 657 return 0; |
| 589 if (sizeof(JCOEF) != 2) | 658 if (sizeof(JCOEF) != 2) |
| 590 return 0; | 659 return 0; |
| 591 if (sizeof(DCTELEM) != 2) | 660 if (sizeof(DCTELEM) != 2) |
| 592 return 0; | 661 return 0; |
| 593 | 662 |
| 594 return 1; | 663 if (simd_support & JSIMD_SSE2) |
| 664 return 1; |
| 665 |
| 666 return 0; |
| 595 } | 667 } |
| 596 | 668 |
| 597 GLOBAL(int) | 669 GLOBAL(int) |
| 598 jsimd_can_quantize_float (void) | 670 jsimd_can_quantize_float (void) |
| 599 { | 671 { |
| 672 init_simd(); |
| 673 |
| 600 /* The code is optimised for these values only */ | 674 /* The code is optimised for these values only */ |
| 601 if (DCTSIZE != 8) | 675 if (DCTSIZE != 8) |
| 602 return 0; | 676 return 0; |
| 603 if (sizeof(JCOEF) != 2) | 677 if (sizeof(JCOEF) != 2) |
| 604 return 0; | 678 return 0; |
| 605 if (sizeof(FAST_FLOAT) != 4) | 679 if (sizeof(FAST_FLOAT) != 4) |
| 606 return 0; | 680 return 0; |
| 607 | 681 |
| 608 return 1; | 682 if (simd_support & JSIMD_SSE2) |
| 683 return 1; |
| 684 |
| 685 return 0; |
| 609 } | 686 } |
| 610 | 687 |
| 611 GLOBAL(void) | 688 GLOBAL(void) |
| 612 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, |
| 613 DCTELEM * workspace) | 690 DCTELEM *workspace) |
| 614 { | 691 { |
| 615 jsimd_quantize_sse2(coef_block, divisors, workspace); | 692 jsimd_quantize_sse2(coef_block, divisors, workspace); |
| 616 } | 693 } |
| 617 | 694 |
| 618 GLOBAL(void) | 695 GLOBAL(void) |
| 619 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
| 620 FAST_FLOAT * workspace) | 697 FAST_FLOAT *workspace) |
| 621 { | 698 { |
| 622 jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 699 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
| 623 } | 700 } |
| 624 #endif | |
| 625 | 701 |
| 626 GLOBAL(int) | 702 GLOBAL(int) |
| 627 jsimd_can_idct_2x2 (void) | 703 jsimd_can_idct_2x2 (void) |
| 628 { | 704 { |
| 705 init_simd(); |
| 706 |
| 629 /* The code is optimised for these values only */ | 707 /* The code is optimised for these values only */ |
| 630 if (DCTSIZE != 8) | 708 if (DCTSIZE != 8) |
| 631 return 0; | 709 return 0; |
| 632 if (sizeof(JCOEF) != 2) | 710 if (sizeof(JCOEF) != 2) |
| 633 return 0; | 711 return 0; |
| 634 if (BITS_IN_JSAMPLE != 8) | 712 if (BITS_IN_JSAMPLE != 8) |
| 635 return 0; | 713 return 0; |
| 636 if (sizeof(JDIMENSION) != 4) | 714 if (sizeof(JDIMENSION) != 4) |
| 637 return 0; | 715 return 0; |
| 638 if (sizeof(ISLOW_MULT_TYPE) != 2) | 716 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 639 return 0; | 717 return 0; |
| 640 | 718 |
| 641 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 719 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 642 return 0; | 720 return 1; |
| 643 | 721 |
| 644 return 1; | 722 return 0; |
| 645 } | 723 } |
| 646 | 724 |
| 647 GLOBAL(int) | 725 GLOBAL(int) |
| 648 jsimd_can_idct_4x4 (void) | 726 jsimd_can_idct_4x4 (void) |
| 649 { | 727 { |
| 728 init_simd(); |
| 729 |
| 650 /* The code is optimised for these values only */ | 730 /* The code is optimised for these values only */ |
| 651 if (DCTSIZE != 8) | 731 if (DCTSIZE != 8) |
| 652 return 0; | 732 return 0; |
| 653 if (sizeof(JCOEF) != 2) | 733 if (sizeof(JCOEF) != 2) |
| 654 return 0; | 734 return 0; |
| 655 if (BITS_IN_JSAMPLE != 8) | 735 if (BITS_IN_JSAMPLE != 8) |
| 656 return 0; | 736 return 0; |
| 657 if (sizeof(JDIMENSION) != 4) | 737 if (sizeof(JDIMENSION) != 4) |
| 658 return 0; | 738 return 0; |
| 659 if (sizeof(ISLOW_MULT_TYPE) != 2) | 739 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 660 return 0; | 740 return 0; |
| 661 | 741 |
| 662 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 742 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 663 return 0; | 743 return 1; |
| 664 | 744 |
| 665 return 1; | 745 return 0; |
| 666 } | 746 } |
| 667 | 747 |
| 668 GLOBAL(void) | 748 GLOBAL(void) |
| 669 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 670 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 750 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 671 JDIMENSION output_col) | 751 JDIMENSION output_col) |
| 672 { | 752 { |
| 673 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 753 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
| 674 } | 754 } |
| 675 | 755 |
| 676 GLOBAL(void) | 756 GLOBAL(void) |
| 677 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 678 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 758 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 679 JDIMENSION output_col) | 759 JDIMENSION output_col) |
| 680 { | 760 { |
| 681 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 761 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
| 682 } | 762 } |
| 683 | 763 |
| 684 GLOBAL(int) | 764 GLOBAL(int) |
| 685 jsimd_can_idct_islow (void) | 765 jsimd_can_idct_islow (void) |
| 686 { | 766 { |
| 767 init_simd(); |
| 768 |
| 687 /* The code is optimised for these values only */ | 769 /* The code is optimised for these values only */ |
| 688 if (DCTSIZE != 8) | 770 if (DCTSIZE != 8) |
| 689 return 0; | 771 return 0; |
| 690 if (sizeof(JCOEF) != 2) | 772 if (sizeof(JCOEF) != 2) |
| 691 return 0; | 773 return 0; |
| 692 if (BITS_IN_JSAMPLE != 8) | 774 if (BITS_IN_JSAMPLE != 8) |
| 693 return 0; | 775 return 0; |
| 694 if (sizeof(JDIMENSION) != 4) | 776 if (sizeof(JDIMENSION) != 4) |
| 695 return 0; | 777 return 0; |
| 696 if (sizeof(ISLOW_MULT_TYPE) != 2) | 778 if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 697 return 0; | 779 return 0; |
| 698 | 780 |
| 699 if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2)) | 781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
| 700 return 0; | 782 return 1; |
| 701 | 783 |
| 702 return 1; | 784 return 0; |
| 703 } | 785 } |
| 704 | 786 |
| 705 GLOBAL(int) | 787 GLOBAL(int) |
| 706 jsimd_can_idct_ifast (void) | 788 jsimd_can_idct_ifast (void) |
| 707 { | 789 { |
| 790 init_simd(); |
| 791 |
| 708 /* The code is optimised for these values only */ | 792 /* The code is optimised for these values only */ |
| 709 if (DCTSIZE != 8) | 793 if (DCTSIZE != 8) |
| 710 return 0; | 794 return 0; |
| 711 if (sizeof(JCOEF) != 2) | 795 if (sizeof(JCOEF) != 2) |
| 712 return 0; | 796 return 0; |
| 713 if (BITS_IN_JSAMPLE != 8) | 797 if (BITS_IN_JSAMPLE != 8) |
| 714 return 0; | 798 return 0; |
| 715 if (sizeof(JDIMENSION) != 4) | 799 if (sizeof(JDIMENSION) != 4) |
| 716 return 0; | 800 return 0; |
| 717 if (sizeof(IFAST_MULT_TYPE) != 2) | 801 if (sizeof(IFAST_MULT_TYPE) != 2) |
| 718 return 0; | 802 return 0; |
| 719 if (IFAST_SCALE_BITS != 2) | 803 if (IFAST_SCALE_BITS != 2) |
| 720 return 0; | 804 return 0; |
| 721 | 805 |
| 722 if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) | 806 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
| 723 return 0; | 807 return 1; |
| 724 | 808 |
| 725 return 1; | 809 return 0; |
| 726 } | 810 } |
| 727 | 811 |
| 728 GLOBAL(int) | 812 GLOBAL(int) |
| 729 jsimd_can_idct_float (void) | 813 jsimd_can_idct_float (void) |
| 730 { | 814 { |
| 815 init_simd(); |
| 816 |
| 731 if (DCTSIZE != 8) | 817 if (DCTSIZE != 8) |
| 732 return 0; | 818 return 0; |
| 733 if (sizeof(JCOEF) != 2) | 819 if (sizeof(JCOEF) != 2) |
| 734 return 0; | 820 return 0; |
| 735 if (BITS_IN_JSAMPLE != 8) | 821 if (BITS_IN_JSAMPLE != 8) |
| 736 return 0; | 822 return 0; |
| 737 if (sizeof(JDIMENSION) != 4) | 823 if (sizeof(JDIMENSION) != 4) |
| 738 return 0; | 824 return 0; |
| 739 if (sizeof(FAST_FLOAT) != 4) | 825 if (sizeof(FAST_FLOAT) != 4) |
| 740 return 0; | 826 return 0; |
| 741 if (sizeof(FLOAT_MULT_TYPE) != 4) | 827 if (sizeof(FLOAT_MULT_TYPE) != 4) |
| 742 return 0; | 828 return 0; |
| 743 | 829 |
| 744 if (!IS_ALIGNED_SSE(jconst_idct_float_sse2)) | 830 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
| 745 return 0; | 831 return 1; |
| 746 | 832 |
| 747 return 1; | 833 return 0; |
| 748 } | 834 } |
| 749 | 835 |
| 750 GLOBAL(void) | 836 GLOBAL(void) |
| 751 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 752 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 838 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 753 JDIMENSION output_col) | 839 JDIMENSION output_col) |
| 754 { | 840 { |
| 755 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 841 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, |
| 842 output_col); |
| 756 } | 843 } |
| 757 | 844 |
| 758 GLOBAL(void) | 845 GLOBAL(void) |
| 759 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 760 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 847 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 761 JDIMENSION output_col) | 848 JDIMENSION output_col) |
| 762 { | 849 { |
| 763 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 850 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, |
| 851 output_col); |
| 764 } | 852 } |
| 765 | 853 |
| 766 GLOBAL(void) | 854 GLOBAL(void) |
| 767 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 768 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 856 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 769 JDIMENSION output_col) | 857 JDIMENSION output_col) |
| 770 { | 858 { |
| 771 jsimd_idct_float_sse2(compptr->dct_table, coef_block, | 859 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, |
| 772 output_buf, output_col); | 860 output_col); |
| 773 } | 861 } |
| 862 |
| 863 GLOBAL(int) |
| 864 jsimd_can_huff_encode_one_block (void) |
| 865 { |
| 866 init_simd(); |
| 867 |
| 868 if (DCTSIZE != 8) |
| 869 return 0; |
| 870 if (sizeof(JCOEF) != 2) |
| 871 return 0; |
| 872 |
| 873 if ((simd_support & JSIMD_SSE2) && simd_huffman && |
| 874 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) |
| 875 return 1; |
| 876 |
| 877 return 0; |
| 878 } |
| 879 |
| 880 GLOBAL(JOCTET*) |
| 881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, |
| 882 int last_dc_val, c_derived_tbl *dctbl, |
| 883 c_derived_tbl *actbl) |
| 884 { |
| 885 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, |
| 886 dctbl, actbl); |
| 887 } |
| OLD | NEW |