| OLD | NEW |
| 1 /* | 1 /* |
| 2 * jsimd_i386.c | 2 * jsimd_i386.c |
| 3 * | 3 * |
| 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 * Copyright 2009-2011, 2013-2014 D. R. Commander | 5 * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander |
| 6 * | 6 * Copyright 2015 Matthieu Darbois |
| 7 * |
| 7 * Based on the x86 SIMD extension for IJG JPEG library, | 8 * Based on the x86 SIMD extension for IJG JPEG library, |
| 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 * | 11 * |
| 11 * This file contains the interface between the "normal" portions | 12 * This file contains the interface between the "normal" portions |
| 12 * of the library and the SIMD implementations when running on a | 13 * of the library and the SIMD implementations when running on a |
| 13 * 32-bit x86 architecture. | 14 * 32-bit x86 architecture. |
| 14 */ | 15 */ |
| 15 | 16 |
| 16 #define JPEG_INTERNALS | 17 #define JPEG_INTERNALS |
| 17 #include "../jinclude.h" | 18 #include "../jinclude.h" |
| 18 #include "../jpeglib.h" | 19 #include "../jpeglib.h" |
| 19 #include "../jsimd.h" | 20 #include "../jsimd.h" |
| 20 #include "../jdct.h" | 21 #include "../jdct.h" |
| 21 #include "../jsimddct.h" | 22 #include "../jsimddct.h" |
| 22 #include "jsimd.h" | 23 #include "jsimd.h" |
| 23 | 24 |
| 24 /* | 25 /* |
| 25 * In the PIC cases, we have no guarantee that constants will keep | 26 * In the PIC cases, we have no guarantee that constants will keep |
| 26 * their alignment. This macro allows us to verify it at runtime. | 27 * their alignment. This macro allows us to verify it at runtime. |
| 27 */ | 28 */ |
| 28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) | 29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) |
| 29 | 30 |
| 30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ | 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
| 31 | 32 |
| 32 static unsigned int simd_support = ~0; | 33 static unsigned int simd_support = ~0; |
| 34 static unsigned int simd_huffman = 1; |
| 33 | 35 |
| 34 /* | 36 /* |
| 35 * Check what SIMD accelerations are supported. | 37 * Check what SIMD accelerations are supported. |
| 36 * | 38 * |
| 37 * FIXME: This code is racy under a multi-threaded environment. | 39 * FIXME: This code is racy under a multi-threaded environment. |
| 38 */ | 40 */ |
| 39 LOCAL(void) | 41 LOCAL(void) |
| 40 init_simd (void) | 42 init_simd (void) |
| 41 { | 43 { |
| 42 char *env = NULL; | 44 char *env = NULL; |
| 43 | 45 |
| 44 if (simd_support != ~0U) | 46 if (simd_support != ~0U) |
| 45 return; | 47 return; |
| 46 | 48 |
| 47 simd_support = jpeg_simd_cpu_support(); | 49 simd_support = jpeg_simd_cpu_support(); |
| 48 | 50 |
| 49 /* Force different settings through environment variables */ | 51 /* Force different settings through environment variables */ |
| 50 env = getenv("JSIMD_FORCEMMX"); | 52 env = getenv("JSIMD_FORCEMMX"); |
| 51 if ((env != NULL) && (strcmp(env, "1") == 0)) | 53 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 52 simd_support &= JSIMD_MMX; | 54 simd_support &= JSIMD_MMX; |
| 53 env = getenv("JSIMD_FORCE3DNOW"); | 55 env = getenv("JSIMD_FORCE3DNOW"); |
| 54 if ((env != NULL) && (strcmp(env, "1") == 0)) | 56 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; | 57 simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
| 56 env = getenv("JSIMD_FORCESSE"); | 58 env = getenv("JSIMD_FORCESSE"); |
| 57 if ((env != NULL) && (strcmp(env, "1") == 0)) | 59 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 58 simd_support &= JSIMD_SSE|JSIMD_MMX; | 60 simd_support &= JSIMD_SSE|JSIMD_MMX; |
| 59 env = getenv("JSIMD_FORCESSE2"); | 61 env = getenv("JSIMD_FORCESSE2"); |
| 60 if ((env != NULL) && (strcmp(env, "1") == 0)) | 62 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 61 simd_support &= JSIMD_SSE2; | 63 simd_support &= JSIMD_SSE2; |
| 64 env = getenv("JSIMD_FORCENONE"); |
| 65 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 66 simd_support = 0; |
| 67 env = getenv("JSIMD_NOHUFFENC"); |
| 68 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 69 simd_huffman = 0; |
| 62 } | 70 } |
| 63 | 71 |
| 64 #ifndef JPEG_DECODE_ONLY | |
| 65 GLOBAL(int) | 72 GLOBAL(int) |
| 66 jsimd_can_rgb_ycc (void) | 73 jsimd_can_rgb_ycc (void) |
| 67 { | 74 { |
| 68 init_simd(); | 75 init_simd(); |
| 69 | 76 |
| 70 /* The code is optimised for these values only */ | 77 /* The code is optimised for these values only */ |
| 71 if (BITS_IN_JSAMPLE != 8) | 78 if (BITS_IN_JSAMPLE != 8) |
| 72 return 0; | 79 return 0; |
| 73 if (sizeof(JDIMENSION) != 4) | 80 if (sizeof(JDIMENSION) != 4) |
| 74 return 0; | 81 return 0; |
| 75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 82 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 76 return 0; | 83 return 0; |
| 77 | 84 |
| 78 if ((simd_support & JSIMD_SSE2) && | 85 if ((simd_support & JSIMD_SSE2) && |
| 79 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 86 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 80 return 1; | 87 return 1; |
| 81 if (simd_support & JSIMD_MMX) | 88 if (simd_support & JSIMD_MMX) |
| 82 return 1; | 89 return 1; |
| 83 | 90 |
| 84 return 0; | 91 return 0; |
| 85 } | 92 } |
| 86 #endif | |
| 87 | 93 |
| 88 GLOBAL(int) | 94 GLOBAL(int) |
| 89 jsimd_can_rgb_gray (void) | 95 jsimd_can_rgb_gray (void) |
| 90 { | 96 { |
| 91 init_simd(); | 97 init_simd(); |
| 92 | 98 |
| 93 /* The code is optimised for these values only */ | 99 /* The code is optimised for these values only */ |
| 94 if (BITS_IN_JSAMPLE != 8) | 100 if (BITS_IN_JSAMPLE != 8) |
| 95 return 0; | 101 return 0; |
| 96 if (sizeof(JDIMENSION) != 4) | 102 if (sizeof(JDIMENSION) != 4) |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 128 | 134 |
| 129 return 0; | 135 return 0; |
| 130 } | 136 } |
| 131 | 137 |
| 132 GLOBAL(int) | 138 GLOBAL(int) |
| 133 jsimd_can_ycc_rgb565 (void) | 139 jsimd_can_ycc_rgb565 (void) |
| 134 { | 140 { |
| 135 return 0; | 141 return 0; |
| 136 } | 142 } |
| 137 | 143 |
| 138 #ifndef JPEG_DECODE_ONLY | |
| 139 GLOBAL(void) | 144 GLOBAL(void) |
| 140 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 145 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 141 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 146 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 142 JDIMENSION output_row, int num_rows) | 147 JDIMENSION output_row, int num_rows) |
| 143 { | 148 { |
| 144 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 149 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 145 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 150 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 146 | 151 |
| 147 switch(cinfo->in_color_space) | 152 switch(cinfo->in_color_space) { |
| 148 { | |
| 149 case JCS_EXT_RGB: | 153 case JCS_EXT_RGB: |
| 150 sse2fct=jsimd_extrgb_ycc_convert_sse2; | 154 sse2fct=jsimd_extrgb_ycc_convert_sse2; |
| 151 mmxfct=jsimd_extrgb_ycc_convert_mmx; | 155 mmxfct=jsimd_extrgb_ycc_convert_mmx; |
| 152 break; | 156 break; |
| 153 case JCS_EXT_RGBX: | 157 case JCS_EXT_RGBX: |
| 154 case JCS_EXT_RGBA: | 158 case JCS_EXT_RGBA: |
| 155 sse2fct=jsimd_extrgbx_ycc_convert_sse2; | 159 sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
| 156 mmxfct=jsimd_extrgbx_ycc_convert_mmx; | 160 mmxfct=jsimd_extrgbx_ycc_convert_mmx; |
| 157 break; | 161 break; |
| 158 case JCS_EXT_BGR: | 162 case JCS_EXT_BGR: |
| (...skipping 16 matching lines...) Expand all Loading... |
| 175 mmxfct=jsimd_extxrgb_ycc_convert_mmx; | 179 mmxfct=jsimd_extxrgb_ycc_convert_mmx; |
| 176 break; | 180 break; |
| 177 default: | 181 default: |
| 178 sse2fct=jsimd_rgb_ycc_convert_sse2; | 182 sse2fct=jsimd_rgb_ycc_convert_sse2; |
| 179 mmxfct=jsimd_rgb_ycc_convert_mmx; | 183 mmxfct=jsimd_rgb_ycc_convert_mmx; |
| 180 break; | 184 break; |
| 181 } | 185 } |
| 182 | 186 |
| 183 if ((simd_support & JSIMD_SSE2) && | 187 if ((simd_support & JSIMD_SSE2) && |
| 184 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 188 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 185 sse2fct(cinfo->image_width, input_buf, | 189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 186 output_buf, output_row, num_rows); | |
| 187 else if (simd_support & JSIMD_MMX) | 190 else if (simd_support & JSIMD_MMX) |
| 188 mmxfct(cinfo->image_width, input_buf, | 191 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 189 output_buf, output_row, num_rows); | |
| 190 } | |
| 191 #endif | |
| 192 | |
| 193 GLOBAL(void) | |
| 194 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | |
| 195 JSAMPIMAGE input_buf, JDIMENSION input_row, | |
| 196 JSAMPARRAY output_buf, int num_rows) | |
| 197 { | |
| 198 } | 192 } |
| 199 | 193 |
| 200 GLOBAL(void) | 194 GLOBAL(void) |
| 201 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 195 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 202 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 196 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 203 JDIMENSION output_row, int num_rows) | 197 JDIMENSION output_row, int num_rows) |
| 204 { | 198 { |
| 205 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 199 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 206 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 200 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 207 | 201 |
| 208 switch(cinfo->in_color_space) | 202 switch(cinfo->in_color_space) { |
| 209 { | |
| 210 case JCS_EXT_RGB: | 203 case JCS_EXT_RGB: |
| 211 sse2fct=jsimd_extrgb_gray_convert_sse2; | 204 sse2fct=jsimd_extrgb_gray_convert_sse2; |
| 212 mmxfct=jsimd_extrgb_gray_convert_mmx; | 205 mmxfct=jsimd_extrgb_gray_convert_mmx; |
| 213 break; | 206 break; |
| 214 case JCS_EXT_RGBX: | 207 case JCS_EXT_RGBX: |
| 215 case JCS_EXT_RGBA: | 208 case JCS_EXT_RGBA: |
| 216 sse2fct=jsimd_extrgbx_gray_convert_sse2; | 209 sse2fct=jsimd_extrgbx_gray_convert_sse2; |
| 217 mmxfct=jsimd_extrgbx_gray_convert_mmx; | 210 mmxfct=jsimd_extrgbx_gray_convert_mmx; |
| 218 break; | 211 break; |
| 219 case JCS_EXT_BGR: | 212 case JCS_EXT_BGR: |
| (...skipping 16 matching lines...) Expand all Loading... |
| 236 mmxfct=jsimd_extxrgb_gray_convert_mmx; | 229 mmxfct=jsimd_extxrgb_gray_convert_mmx; |
| 237 break; | 230 break; |
| 238 default: | 231 default: |
| 239 sse2fct=jsimd_rgb_gray_convert_sse2; | 232 sse2fct=jsimd_rgb_gray_convert_sse2; |
| 240 mmxfct=jsimd_rgb_gray_convert_mmx; | 233 mmxfct=jsimd_rgb_gray_convert_mmx; |
| 241 break; | 234 break; |
| 242 } | 235 } |
| 243 | 236 |
| 244 if ((simd_support & JSIMD_SSE2) && | 237 if ((simd_support & JSIMD_SSE2) && |
| 245 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) | 238 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
| 246 sse2fct(cinfo->image_width, input_buf, | 239 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 247 output_buf, output_row, num_rows); | |
| 248 else if (simd_support & JSIMD_MMX) | 240 else if (simd_support & JSIMD_MMX) |
| 249 mmxfct(cinfo->image_width, input_buf, | 241 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 250 output_buf, output_row, num_rows); | |
| 251 } | 242 } |
| 252 | 243 |
| 253 GLOBAL(void) | 244 GLOBAL(void) |
| 254 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 245 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 255 JSAMPIMAGE input_buf, JDIMENSION input_row, | 246 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 256 JSAMPARRAY output_buf, int num_rows) | 247 JSAMPARRAY output_buf, int num_rows) |
| 257 { | 248 { |
| 258 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 249 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 259 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 250 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 260 | 251 |
| 261 switch(cinfo->out_color_space) | 252 switch(cinfo->out_color_space) { |
| 262 { | |
| 263 case JCS_EXT_RGB: | 253 case JCS_EXT_RGB: |
| 264 sse2fct=jsimd_ycc_extrgb_convert_sse2; | 254 sse2fct=jsimd_ycc_extrgb_convert_sse2; |
| 265 mmxfct=jsimd_ycc_extrgb_convert_mmx; | 255 mmxfct=jsimd_ycc_extrgb_convert_mmx; |
| 266 break; | 256 break; |
| 267 case JCS_EXT_RGBX: | 257 case JCS_EXT_RGBX: |
| 268 case JCS_EXT_RGBA: | 258 case JCS_EXT_RGBA: |
| 269 sse2fct=jsimd_ycc_extrgbx_convert_sse2; | 259 sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
| 270 mmxfct=jsimd_ycc_extrgbx_convert_mmx; | 260 mmxfct=jsimd_ycc_extrgbx_convert_mmx; |
| 271 break; | 261 break; |
| 272 case JCS_EXT_BGR: | 262 case JCS_EXT_BGR: |
| (...skipping 16 matching lines...) Expand all Loading... |
| 289 mmxfct=jsimd_ycc_extxrgb_convert_mmx; | 279 mmxfct=jsimd_ycc_extxrgb_convert_mmx; |
| 290 break; | 280 break; |
| 291 default: | 281 default: |
| 292 sse2fct=jsimd_ycc_rgb_convert_sse2; | 282 sse2fct=jsimd_ycc_rgb_convert_sse2; |
| 293 mmxfct=jsimd_ycc_rgb_convert_mmx; | 283 mmxfct=jsimd_ycc_rgb_convert_mmx; |
| 294 break; | 284 break; |
| 295 } | 285 } |
| 296 | 286 |
| 297 if ((simd_support & JSIMD_SSE2) && | 287 if ((simd_support & JSIMD_SSE2) && |
| 298 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 288 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
| 299 sse2fct(cinfo->output_width, input_buf, | 289 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 300 input_row, output_buf, num_rows); | |
| 301 else if (simd_support & JSIMD_MMX) | 290 else if (simd_support & JSIMD_MMX) |
| 302 mmxfct(cinfo->output_width, input_buf, | 291 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 303 input_row, output_buf, num_rows); | |
| 304 } | 292 } |
| 305 | 293 |
| 306 #ifndef JPEG_DECODE_ONLY | 294 GLOBAL(void) |
| 295 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
| 296 JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 297 JSAMPARRAY output_buf, int num_rows) |
| 298 { |
| 299 } |
| 300 |
| 307 GLOBAL(int) | 301 GLOBAL(int) |
| 308 jsimd_can_h2v2_downsample (void) | 302 jsimd_can_h2v2_downsample (void) |
| 309 { | 303 { |
| 310 init_simd(); | 304 init_simd(); |
| 311 | 305 |
| 312 /* The code is optimised for these values only */ | 306 /* The code is optimised for these values only */ |
| 313 if (BITS_IN_JSAMPLE != 8) | 307 if (BITS_IN_JSAMPLE != 8) |
| 314 return 0; | 308 return 0; |
| 315 if (sizeof(JDIMENSION) != 4) | 309 if (sizeof(JDIMENSION) != 4) |
| 316 return 0; | 310 return 0; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 336 | 330 |
| 337 if (simd_support & JSIMD_SSE2) | 331 if (simd_support & JSIMD_SSE2) |
| 338 return 1; | 332 return 1; |
| 339 if (simd_support & JSIMD_MMX) | 333 if (simd_support & JSIMD_MMX) |
| 340 return 1; | 334 return 1; |
| 341 | 335 |
| 342 return 0; | 336 return 0; |
| 343 } | 337 } |
| 344 | 338 |
| 345 GLOBAL(void) | 339 GLOBAL(void) |
| 346 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 340 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
| 347 JSAMPARRAY input_data, JSAMPARRAY output_data) | 341 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 348 { | 342 { |
| 349 if (simd_support & JSIMD_SSE2) | 343 if (simd_support & JSIMD_SSE2) |
| 350 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, | 344 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 351 compptr->v_samp_factor, compptr->width_in_blocks, | 345 compptr->v_samp_factor, |
| 352 input_data, output_data); | 346 compptr->width_in_blocks, input_data, |
| 347 output_data); |
| 353 else if (simd_support & JSIMD_MMX) | 348 else if (simd_support & JSIMD_MMX) |
| 354 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, | 349 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
| 355 compptr->v_samp_factor, compptr->width_in_blocks, | 350 compptr->v_samp_factor, compptr->width_in_blocks, |
| 356 input_data, output_data); | 351 input_data, output_data); |
| 357 } | 352 } |
| 358 | 353 |
| 359 GLOBAL(void) | 354 GLOBAL(void) |
| 360 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 355 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
| 361 JSAMPARRAY input_data, JSAMPARRAY output_data) | 356 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 362 { | 357 { |
| 363 if (simd_support & JSIMD_SSE2) | 358 if (simd_support & JSIMD_SSE2) |
| 364 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, | 359 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
| 365 compptr->v_samp_factor, compptr->width_in_blocks, | 360 compptr->v_samp_factor, |
| 366 input_data, output_data); | 361 compptr->width_in_blocks, input_data, |
| 362 output_data); |
| 367 else if (simd_support & JSIMD_MMX) | 363 else if (simd_support & JSIMD_MMX) |
| 368 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, | 364 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
| 369 compptr->v_samp_factor, compptr->width_in_blocks, | 365 compptr->v_samp_factor, compptr->width_in_blocks, |
| 370 input_data, output_data); | 366 input_data, output_data); |
| 371 } | 367 } |
| 372 #endif | |
| 373 | 368 |
| 374 GLOBAL(int) | 369 GLOBAL(int) |
| 375 jsimd_can_h2v2_upsample (void) | 370 jsimd_can_h2v2_upsample (void) |
| 376 { | 371 { |
| 377 init_simd(); | 372 init_simd(); |
| 378 | 373 |
| 379 /* The code is optimised for these values only */ | 374 /* The code is optimised for these values only */ |
| 380 if (BITS_IN_JSAMPLE != 8) | 375 if (BITS_IN_JSAMPLE != 8) |
| 381 return 0; | 376 return 0; |
| 382 if (sizeof(JDIMENSION) != 4) | 377 if (sizeof(JDIMENSION) != 4) |
| (...skipping 21 matching lines...) Expand all Loading... |
| 404 if (simd_support & JSIMD_SSE2) | 399 if (simd_support & JSIMD_SSE2) |
| 405 return 1; | 400 return 1; |
| 406 if (simd_support & JSIMD_MMX) | 401 if (simd_support & JSIMD_MMX) |
| 407 return 1; | 402 return 1; |
| 408 | 403 |
| 409 return 0; | 404 return 0; |
| 410 } | 405 } |
| 411 | 406 |
| 412 GLOBAL(void) | 407 GLOBAL(void) |
| 413 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 408 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 414 jpeg_component_info * compptr, | 409 jpeg_component_info *compptr, |
| 415 JSAMPARRAY input_data, | 410 JSAMPARRAY input_data, |
| 416 JSAMPARRAY * output_data_ptr) | 411 JSAMPARRAY *output_data_ptr) |
| 417 { | 412 { |
| 418 if (simd_support & JSIMD_SSE2) | 413 if (simd_support & JSIMD_SSE2) |
| 419 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, | 414 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
| 420 cinfo->output_width, input_data, output_data_ptr); | 415 input_data, output_data_ptr); |
| 421 else if (simd_support & JSIMD_MMX) | 416 else if (simd_support & JSIMD_MMX) |
| 422 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, | 417 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, |
| 423 cinfo->output_width, input_data, output_data_ptr); | 418 input_data, output_data_ptr); |
| 424 } | 419 } |
| 425 | 420 |
| 426 GLOBAL(void) | 421 GLOBAL(void) |
| 427 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 422 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 428 jpeg_component_info * compptr, | 423 jpeg_component_info *compptr, |
| 429 JSAMPARRAY input_data, | 424 JSAMPARRAY input_data, |
| 430 JSAMPARRAY * output_data_ptr) | 425 JSAMPARRAY *output_data_ptr) |
| 431 { | 426 { |
| 432 if (simd_support & JSIMD_SSE2) | 427 if (simd_support & JSIMD_SSE2) |
| 433 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, | 428 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
| 434 cinfo->output_width, input_data, output_data_ptr); | 429 input_data, output_data_ptr); |
| 435 else if (simd_support & JSIMD_MMX) | 430 else if (simd_support & JSIMD_MMX) |
| 436 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, | 431 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, |
| 437 cinfo->output_width, input_data, output_data_ptr); | 432 input_data, output_data_ptr); |
| 438 } | 433 } |
| 439 | 434 |
| 440 GLOBAL(int) | 435 GLOBAL(int) |
| 441 jsimd_can_h2v2_fancy_upsample (void) | 436 jsimd_can_h2v2_fancy_upsample (void) |
| 442 { | 437 { |
| 443 init_simd(); | 438 init_simd(); |
| 444 | 439 |
| 445 /* The code is optimised for these values only */ | 440 /* The code is optimised for these values only */ |
| 446 if (BITS_IN_JSAMPLE != 8) | 441 if (BITS_IN_JSAMPLE != 8) |
| 447 return 0; | 442 return 0; |
| (...skipping 24 matching lines...) Expand all Loading... |
| 472 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 467 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 473 return 1; | 468 return 1; |
| 474 if (simd_support & JSIMD_MMX) | 469 if (simd_support & JSIMD_MMX) |
| 475 return 1; | 470 return 1; |
| 476 | 471 |
| 477 return 0; | 472 return 0; |
| 478 } | 473 } |
| 479 | 474 |
| 480 GLOBAL(void) | 475 GLOBAL(void) |
| 481 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 476 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 482 jpeg_component_info * compptr, | 477 jpeg_component_info *compptr, |
| 483 JSAMPARRAY input_data, | 478 JSAMPARRAY input_data, |
| 484 JSAMPARRAY * output_data_ptr) | 479 JSAMPARRAY *output_data_ptr) |
| 485 { | 480 { |
| 486 if ((simd_support & JSIMD_SSE2) && | 481 if ((simd_support & JSIMD_SSE2) && |
| 487 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 482 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 488 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 483 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 489 compptr->downsampled_width, input_data, output_data_ptr); | 484 compptr->downsampled_width, input_data, |
| 485 output_data_ptr); |
| 490 else if (simd_support & JSIMD_MMX) | 486 else if (simd_support & JSIMD_MMX) |
| 491 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, | 487 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
| 492 compptr->downsampled_width, input_data, output_data_ptr); | 488 compptr->downsampled_width, input_data, |
| 489 output_data_ptr); |
| 493 } | 490 } |
| 494 | 491 |
| 495 GLOBAL(void) | 492 GLOBAL(void) |
| 496 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 493 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 497 jpeg_component_info * compptr, | 494 jpeg_component_info *compptr, |
| 498 JSAMPARRAY input_data, | 495 JSAMPARRAY input_data, |
| 499 JSAMPARRAY * output_data_ptr) | 496 JSAMPARRAY *output_data_ptr) |
| 500 { | 497 { |
| 501 if ((simd_support & JSIMD_SSE2) && | 498 if ((simd_support & JSIMD_SSE2) && |
| 502 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 499 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 503 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 500 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
| 504 compptr->downsampled_width, input_data, output_data_ptr); | 501 compptr->downsampled_width, input_data, |
| 502 output_data_ptr); |
| 505 else if (simd_support & JSIMD_MMX) | 503 else if (simd_support & JSIMD_MMX) |
| 506 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, | 504 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
| 507 compptr->downsampled_width, input_data, output_data_ptr); | 505 compptr->downsampled_width, input_data, |
| 506 output_data_ptr); |
| 508 } | 507 } |
| 509 | 508 |
| 510 GLOBAL(int) | 509 GLOBAL(int) |
| 511 jsimd_can_h2v2_merged_upsample (void) | 510 jsimd_can_h2v2_merged_upsample (void) |
| 512 { | 511 { |
| 513 init_simd(); | 512 init_simd(); |
| 514 | 513 |
| 515 /* The code is optimised for these values only */ | 514 /* The code is optimised for these values only */ |
| 516 if (BITS_IN_JSAMPLE != 8) | 515 if (BITS_IN_JSAMPLE != 8) |
| 517 return 0; | 516 return 0; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 549 | 548 |
| 550 GLOBAL(void) | 549 GLOBAL(void) |
| 551 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, | 550 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
| 552 JSAMPIMAGE input_buf, | 551 JSAMPIMAGE input_buf, |
| 553 JDIMENSION in_row_group_ctr, | 552 JDIMENSION in_row_group_ctr, |
| 554 JSAMPARRAY output_buf) | 553 JSAMPARRAY output_buf) |
| 555 { | 554 { |
| 556 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 555 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 557 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 556 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 558 | 557 |
| 559 switch(cinfo->out_color_space) | 558 switch(cinfo->out_color_space) { |
| 560 { | |
| 561 case JCS_EXT_RGB: | 559 case JCS_EXT_RGB: |
| 562 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; | 560 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
| 563 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; | 561 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; |
| 564 break; | 562 break; |
| 565 case JCS_EXT_RGBX: | 563 case JCS_EXT_RGBX: |
| 566 case JCS_EXT_RGBA: | 564 case JCS_EXT_RGBA: |
| 567 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; | 565 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
| 568 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; | 566 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; |
| 569 break; | 567 break; |
| 570 case JCS_EXT_BGR: | 568 case JCS_EXT_BGR: |
| (...skipping 16 matching lines...) Expand all Loading... |
| 587 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; | 585 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; |
| 588 break; | 586 break; |
| 589 default: | 587 default: |
| 590 sse2fct=jsimd_h2v2_merged_upsample_sse2; | 588 sse2fct=jsimd_h2v2_merged_upsample_sse2; |
| 591 mmxfct=jsimd_h2v2_merged_upsample_mmx; | 589 mmxfct=jsimd_h2v2_merged_upsample_mmx; |
| 592 break; | 590 break; |
| 593 } | 591 } |
| 594 | 592 |
| 595 if ((simd_support & JSIMD_SSE2) && | 593 if ((simd_support & JSIMD_SSE2) && |
| 596 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 594 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 597 sse2fct(cinfo->output_width, input_buf, | 595 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 598 in_row_group_ctr, output_buf); | |
| 599 else if (simd_support & JSIMD_MMX) | 596 else if (simd_support & JSIMD_MMX) |
| 600 mmxfct(cinfo->output_width, input_buf, | 597 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 601 in_row_group_ctr, output_buf); | |
| 602 } | 598 } |
| 603 | 599 |
| 604 GLOBAL(void) | 600 GLOBAL(void) |
| 605 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, | 601 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
| 606 JSAMPIMAGE input_buf, | 602 JSAMPIMAGE input_buf, |
| 607 JDIMENSION in_row_group_ctr, | 603 JDIMENSION in_row_group_ctr, |
| 608 JSAMPARRAY output_buf) | 604 JSAMPARRAY output_buf) |
| 609 { | 605 { |
| 610 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 606 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 611 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 607 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
| 612 | 608 |
| 613 switch(cinfo->out_color_space) | 609 switch(cinfo->out_color_space) { |
| 614 { | |
| 615 case JCS_EXT_RGB: | 610 case JCS_EXT_RGB: |
| 616 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; | 611 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
| 617 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; | 612 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; |
| 618 break; | 613 break; |
| 619 case JCS_EXT_RGBX: | 614 case JCS_EXT_RGBX: |
| 620 case JCS_EXT_RGBA: | 615 case JCS_EXT_RGBA: |
| 621 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; | 616 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
| 622 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; | 617 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; |
| 623 break; | 618 break; |
| 624 case JCS_EXT_BGR: | 619 case JCS_EXT_BGR: |
| (...skipping 16 matching lines...) Expand all Loading... |
| 641 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; | 636 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; |
| 642 break; | 637 break; |
| 643 default: | 638 default: |
| 644 sse2fct=jsimd_h2v1_merged_upsample_sse2; | 639 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
| 645 mmxfct=jsimd_h2v1_merged_upsample_mmx; | 640 mmxfct=jsimd_h2v1_merged_upsample_mmx; |
| 646 break; | 641 break; |
| 647 } | 642 } |
| 648 | 643 |
| 649 if ((simd_support & JSIMD_SSE2) && | 644 if ((simd_support & JSIMD_SSE2) && |
| 650 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 645 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 651 sse2fct(cinfo->output_width, input_buf, | 646 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 652 in_row_group_ctr, output_buf); | |
| 653 else if (simd_support & JSIMD_MMX) | 647 else if (simd_support & JSIMD_MMX) |
| 654 mmxfct(cinfo->output_width, input_buf, | 648 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 655 in_row_group_ctr, output_buf); | |
| 656 } | 649 } |
| 657 | 650 |
| 658 #ifndef JPEG_DECODE_ONLY | |
| 659 GLOBAL(int) | 651 GLOBAL(int) |
| 660 jsimd_can_convsamp (void) | 652 jsimd_can_convsamp (void) |
| 661 { | 653 { |
| 662 init_simd(); | 654 init_simd(); |
| 663 | 655 |
| 664 /* The code is optimised for these values only */ | 656 /* The code is optimised for these values only */ |
| 665 if (DCTSIZE != 8) | 657 if (DCTSIZE != 8) |
| 666 return 0; | 658 return 0; |
| 667 if (BITS_IN_JSAMPLE != 8) | 659 if (BITS_IN_JSAMPLE != 8) |
| 668 return 0; | 660 return 0; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 699 if (simd_support & JSIMD_SSE) | 691 if (simd_support & JSIMD_SSE) |
| 700 return 1; | 692 return 1; |
| 701 if (simd_support & JSIMD_3DNOW) | 693 if (simd_support & JSIMD_3DNOW) |
| 702 return 1; | 694 return 1; |
| 703 | 695 |
| 704 return 0; | 696 return 0; |
| 705 } | 697 } |
| 706 | 698 |
| 707 GLOBAL(void) | 699 GLOBAL(void) |
| 708 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 700 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 709 DCTELEM * workspace) | 701 DCTELEM *workspace) |
| 710 { | 702 { |
| 711 if (simd_support & JSIMD_SSE2) | 703 if (simd_support & JSIMD_SSE2) |
| 712 jsimd_convsamp_sse2(sample_data, start_col, workspace); | 704 jsimd_convsamp_sse2(sample_data, start_col, workspace); |
| 713 else if (simd_support & JSIMD_MMX) | 705 else if (simd_support & JSIMD_MMX) |
| 714 jsimd_convsamp_mmx(sample_data, start_col, workspace); | 706 jsimd_convsamp_mmx(sample_data, start_col, workspace); |
| 715 } | 707 } |
| 716 | 708 |
| 717 GLOBAL(void) | 709 GLOBAL(void) |
| 718 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 710 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 719 FAST_FLOAT * workspace) | 711 FAST_FLOAT *workspace) |
| 720 { | 712 { |
| 721 if (simd_support & JSIMD_SSE2) | 713 if (simd_support & JSIMD_SSE2) |
| 722 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); | 714 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
| 723 else if (simd_support & JSIMD_SSE) | 715 else if (simd_support & JSIMD_SSE) |
| 724 jsimd_convsamp_float_sse(sample_data, start_col, workspace); | 716 jsimd_convsamp_float_sse(sample_data, start_col, workspace); |
| 725 else if (simd_support & JSIMD_3DNOW) | 717 else if (simd_support & JSIMD_3DNOW) |
| 726 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); | 718 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); |
| 727 } | 719 } |
| 728 | 720 |
| 729 GLOBAL(int) | 721 GLOBAL(int) |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 777 | 769 |
| 778 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) | 770 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
| 779 return 1; | 771 return 1; |
| 780 if (simd_support & JSIMD_3DNOW) | 772 if (simd_support & JSIMD_3DNOW) |
| 781 return 1; | 773 return 1; |
| 782 | 774 |
| 783 return 0; | 775 return 0; |
| 784 } | 776 } |
| 785 | 777 |
| 786 GLOBAL(void) | 778 GLOBAL(void) |
| 787 jsimd_fdct_islow (DCTELEM * data) | 779 jsimd_fdct_islow (DCTELEM *data) |
| 788 { | 780 { |
| 789 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) | 781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 790 jsimd_fdct_islow_sse2(data); | 782 jsimd_fdct_islow_sse2(data); |
| 791 else if (simd_support & JSIMD_MMX) | 783 else if (simd_support & JSIMD_MMX) |
| 792 jsimd_fdct_islow_mmx(data); | 784 jsimd_fdct_islow_mmx(data); |
| 793 } | 785 } |
| 794 | 786 |
| 795 GLOBAL(void) | 787 GLOBAL(void) |
| 796 jsimd_fdct_ifast (DCTELEM * data) | 788 jsimd_fdct_ifast (DCTELEM *data) |
| 797 { | 789 { |
| 798 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) | 790 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
| 799 jsimd_fdct_ifast_sse2(data); | 791 jsimd_fdct_ifast_sse2(data); |
| 800 else if (simd_support & JSIMD_MMX) | 792 else if (simd_support & JSIMD_MMX) |
| 801 jsimd_fdct_ifast_mmx(data); | 793 jsimd_fdct_ifast_mmx(data); |
| 802 } | 794 } |
| 803 | 795 |
| 804 GLOBAL(void) | 796 GLOBAL(void) |
| 805 jsimd_fdct_float (FAST_FLOAT * data) | 797 jsimd_fdct_float (FAST_FLOAT *data) |
| 806 { | 798 { |
| 807 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) | 799 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
| 808 jsimd_fdct_float_sse(data); | 800 jsimd_fdct_float_sse(data); |
| 809 else if (simd_support & JSIMD_3DNOW) | 801 else if (simd_support & JSIMD_3DNOW) |
| 810 jsimd_fdct_float_3dnow(data); | 802 jsimd_fdct_float_3dnow(data); |
| 811 } | 803 } |
| 812 | 804 |
| 813 GLOBAL(int) | 805 GLOBAL(int) |
| 814 jsimd_can_quantize (void) | 806 jsimd_can_quantize (void) |
| 815 { | 807 { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 848 return 1; | 840 return 1; |
| 849 if (simd_support & JSIMD_SSE) | 841 if (simd_support & JSIMD_SSE) |
| 850 return 1; | 842 return 1; |
| 851 if (simd_support & JSIMD_3DNOW) | 843 if (simd_support & JSIMD_3DNOW) |
| 852 return 1; | 844 return 1; |
| 853 | 845 |
| 854 return 0; | 846 return 0; |
| 855 } | 847 } |
| 856 | 848 |
| 857 GLOBAL(void) | 849 GLOBAL(void) |
| 858 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 850 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, |
| 859 DCTELEM * workspace) | 851 DCTELEM *workspace) |
| 860 { | 852 { |
| 861 if (simd_support & JSIMD_SSE2) | 853 if (simd_support & JSIMD_SSE2) |
| 862 jsimd_quantize_sse2(coef_block, divisors, workspace); | 854 jsimd_quantize_sse2(coef_block, divisors, workspace); |
| 863 else if (simd_support & JSIMD_MMX) | 855 else if (simd_support & JSIMD_MMX) |
| 864 jsimd_quantize_mmx(coef_block, divisors, workspace); | 856 jsimd_quantize_mmx(coef_block, divisors, workspace); |
| 865 } | 857 } |
| 866 | 858 |
| 867 GLOBAL(void) | 859 GLOBAL(void) |
| 868 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 860 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
| 869 FAST_FLOAT * workspace) | 861 FAST_FLOAT *workspace) |
| 870 { | 862 { |
| 871 if (simd_support & JSIMD_SSE2) | 863 if (simd_support & JSIMD_SSE2) |
| 872 jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 864 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
| 873 else if (simd_support & JSIMD_SSE) | 865 else if (simd_support & JSIMD_SSE) |
| 874 jsimd_quantize_float_sse(coef_block, divisors, workspace); | 866 jsimd_quantize_float_sse(coef_block, divisors, workspace); |
| 875 else if (simd_support & JSIMD_3DNOW) | 867 else if (simd_support & JSIMD_3DNOW) |
| 876 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); | 868 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
| 877 } | 869 } |
| 878 #endif | |
| 879 | 870 |
| 880 GLOBAL(int) | 871 GLOBAL(int) |
| 881 jsimd_can_idct_2x2 (void) | 872 jsimd_can_idct_2x2 (void) |
| 882 { | 873 { |
| 883 init_simd(); | 874 init_simd(); |
| 884 | 875 |
| 885 /* The code is optimised for these values only */ | 876 /* The code is optimised for these values only */ |
| 886 if (DCTSIZE != 8) | 877 if (DCTSIZE != 8) |
| 887 return 0; | 878 return 0; |
| 888 if (sizeof(JCOEF) != 2) | 879 if (sizeof(JCOEF) != 2) |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 921 | 912 |
| 922 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 913 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 923 return 1; | 914 return 1; |
| 924 if (simd_support & JSIMD_MMX) | 915 if (simd_support & JSIMD_MMX) |
| 925 return 1; | 916 return 1; |
| 926 | 917 |
| 927 return 0; | 918 return 0; |
| 928 } | 919 } |
| 929 | 920 |
| 930 GLOBAL(void) | 921 GLOBAL(void) |
| 931 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 922 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 932 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 923 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 933 JDIMENSION output_col) | 924 JDIMENSION output_col) |
| 934 { | 925 { |
| 935 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 926 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 936 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 927 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, |
| 928 output_col); |
| 937 else if (simd_support & JSIMD_MMX) | 929 else if (simd_support & JSIMD_MMX) |
| 938 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); | 930 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
| 939 } | 931 } |
| 940 | 932 |
| 941 GLOBAL(void) | 933 GLOBAL(void) |
| 942 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 934 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 943 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 935 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 944 JDIMENSION output_col) | 936 JDIMENSION output_col) |
| 945 { | 937 { |
| 946 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 938 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
| 947 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 939 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, |
| 940 output_col); |
| 948 else if (simd_support & JSIMD_MMX) | 941 else if (simd_support & JSIMD_MMX) |
| 949 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); | 942 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
| 950 } | 943 } |
| 951 | 944 |
| 952 GLOBAL(int) | 945 GLOBAL(int) |
| 953 jsimd_can_idct_islow (void) | 946 jsimd_can_idct_islow (void) |
| 954 { | 947 { |
| 955 init_simd(); | 948 init_simd(); |
| 956 | 949 |
| 957 /* The code is optimised for these values only */ | 950 /* The code is optimised for these values only */ |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1023 return 1; | 1016 return 1; |
| 1024 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) | 1017 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
| 1025 return 1; | 1018 return 1; |
| 1026 if (simd_support & JSIMD_3DNOW) | 1019 if (simd_support & JSIMD_3DNOW) |
| 1027 return 1; | 1020 return 1; |
| 1028 | 1021 |
| 1029 return 0; | 1022 return 0; |
| 1030 } | 1023 } |
| 1031 | 1024 |
| 1032 GLOBAL(void) | 1025 GLOBAL(void) |
| 1033 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 1026 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 1034 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 1027 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 1035 JDIMENSION output_col) | 1028 JDIMENSION output_col) |
| 1036 { | 1029 { |
| 1037 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) | 1030 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
| 1038 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col
); | 1031 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, |
| 1032 output_col); |
| 1039 else if (simd_support & JSIMD_MMX) | 1033 else if (simd_support & JSIMD_MMX) |
| 1040 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col)
; | 1034 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, |
| 1035 output_col); |
| 1041 } | 1036 } |
| 1042 | 1037 |
| 1043 GLOBAL(void) | 1038 GLOBAL(void) |
| 1044 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 1039 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 1045 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 1040 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 1046 JDIMENSION output_col) | 1041 JDIMENSION output_col) |
| 1047 { | 1042 { |
| 1048 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) | 1043 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
| 1049 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col
); | 1044 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, |
| 1045 output_col); |
| 1050 else if (simd_support & JSIMD_MMX) | 1046 else if (simd_support & JSIMD_MMX) |
| 1051 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col)
; | 1047 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, |
| 1048 output_col); |
| 1052 } | 1049 } |
| 1053 | 1050 |
| 1054 GLOBAL(void) | 1051 GLOBAL(void) |
| 1055 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 1052 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
| 1056 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 1053 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 1057 JDIMENSION output_col) | 1054 JDIMENSION output_col) |
| 1058 { | 1055 { |
| 1059 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) | 1056 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
| 1060 jsimd_idct_float_sse2(compptr->dct_table, coef_block, | 1057 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, |
| 1061 output_buf, output_col); | 1058 output_col); |
| 1062 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) | 1059 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
| 1063 jsimd_idct_float_sse(compptr->dct_table, coef_block, | 1060 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, |
| 1064 output_buf, output_col); | 1061 output_col); |
| 1065 else if (simd_support & JSIMD_3DNOW) | 1062 else if (simd_support & JSIMD_3DNOW) |
| 1066 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, | 1063 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, |
| 1067 output_buf, output_col); | 1064 output_col); |
| 1068 } | 1065 } |
| 1066 |
| 1067 GLOBAL(int) |
| 1068 jsimd_can_huff_encode_one_block (void) |
| 1069 { |
| 1070 init_simd(); |
| 1071 |
| 1072 if (DCTSIZE != 8) |
| 1073 return 0; |
| 1074 if (sizeof(JCOEF) != 2) |
| 1075 return 0; |
| 1076 |
| 1077 if ((simd_support & JSIMD_SSE2) && simd_huffman && |
| 1078 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) |
| 1079 return 1; |
| 1080 |
| 1081 return 0; |
| 1082 } |
| 1083 |
| 1084 GLOBAL(JOCTET*) |
| 1085 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, |
| 1086 int last_dc_val, c_derived_tbl *dctbl, |
| 1087 c_derived_tbl *actbl) |
| 1088 { |
| 1089 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, |
| 1090 dctbl, actbl); |
| 1091 } |
| OLD | NEW |