OLD | NEW |
1 /* | 1 /* |
2 * jsimd_x86_64.c | 2 * jsimd_x86_64.c |
3 * | 3 * |
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 * Copyright 2009-2011, 2014 D. R. Commander | 5 * Copyright 2009-2011, 2014, 2016 D. R. Commander |
6 * | 6 * Copyright 2015 Matthieu Darbois |
| 7 * |
7 * Based on the x86 SIMD extension for IJG JPEG library, | 8 * Based on the x86 SIMD extension for IJG JPEG library, |
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 * | 11 * |
11 * This file contains the interface between the "normal" portions | 12 * This file contains the interface between the "normal" portions |
12 * of the library and the SIMD implementations when running on a | 13 * of the library and the SIMD implementations when running on a |
13 * x86_64 architecture. | 14 * 64-bit x86 architecture. |
14 */ | 15 */ |
15 | 16 |
16 #define JPEG_INTERNALS | 17 #define JPEG_INTERNALS |
17 #include "../jinclude.h" | 18 #include "../jinclude.h" |
18 #include "../jpeglib.h" | 19 #include "../jpeglib.h" |
19 #include "../jsimd.h" | 20 #include "../jsimd.h" |
20 #include "../jdct.h" | 21 #include "../jdct.h" |
21 #include "../jsimddct.h" | 22 #include "../jsimddct.h" |
22 #include "jsimd.h" | 23 #include "jsimd.h" |
23 | 24 |
24 /* | 25 /* |
25 * In the PIC cases, we have no guarantee that constants will keep | 26 * In the PIC cases, we have no guarantee that constants will keep |
26 * their alignment. This macro allows us to verify it at runtime. | 27 * their alignment. This macro allows us to verify it at runtime. |
27 */ | 28 */ |
28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) | 29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) |
29 | 30 |
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ | 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
31 | 32 |
32 #ifndef JPEG_DECODE_ONLY | 33 static unsigned int simd_support = ~0; |
| 34 static unsigned int simd_huffman = 1; |
| 35 |
| 36 /* |
| 37 * Check what SIMD accelerations are supported. |
| 38 * |
| 39 * FIXME: This code is racy under a multi-threaded environment. |
| 40 */ |
| 41 LOCAL(void) |
| 42 init_simd (void) |
| 43 { |
| 44 char *env = NULL; |
| 45 |
| 46 if (simd_support != ~0U) |
| 47 return; |
| 48 |
| 49 simd_support = JSIMD_SSE2 | JSIMD_SSE; |
| 50 |
| 51 /* Force different settings through environment variables */ |
| 52 env = getenv("JSIMD_FORCENONE"); |
| 53 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 54 simd_support = 0; |
| 55 env = getenv("JSIMD_NOHUFFENC"); |
| 56 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 57 simd_huffman = 0; |
| 58 } |
| 59 |
33 GLOBAL(int) | 60 GLOBAL(int) |
34 jsimd_can_rgb_ycc (void) | 61 jsimd_can_rgb_ycc (void) |
35 { | 62 { |
| 63 init_simd(); |
| 64 |
36 /* The code is optimised for these values only */ | 65 /* The code is optimised for these values only */ |
37 if (BITS_IN_JSAMPLE != 8) | 66 if (BITS_IN_JSAMPLE != 8) |
38 return 0; | 67 return 0; |
39 if (sizeof(JDIMENSION) != 4) | 68 if (sizeof(JDIMENSION) != 4) |
40 return 0; | 69 return 0; |
41 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
42 return 0; | 71 return 0; |
43 | 72 |
44 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 73 if ((simd_support & JSIMD_SSE2) && |
45 return 0; | 74 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
| 75 return 1; |
46 | 76 |
47 return 1; | 77 return 0; |
48 } | 78 } |
49 #endif | |
50 | 79 |
51 GLOBAL(int) | 80 GLOBAL(int) |
52 jsimd_can_rgb_gray (void) | 81 jsimd_can_rgb_gray (void) |
53 { | 82 { |
| 83 init_simd(); |
| 84 |
54 /* The code is optimised for these values only */ | 85 /* The code is optimised for these values only */ |
55 if (BITS_IN_JSAMPLE != 8) | 86 if (BITS_IN_JSAMPLE != 8) |
56 return 0; | 87 return 0; |
57 if (sizeof(JDIMENSION) != 4) | 88 if (sizeof(JDIMENSION) != 4) |
58 return 0; | 89 return 0; |
59 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 90 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
60 return 0; | 91 return 0; |
61 | 92 |
62 if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) | 93 if ((simd_support & JSIMD_SSE2) && |
63 return 0; | 94 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
| 95 return 1; |
64 | 96 |
65 return 1; | 97 return 0; |
66 } | 98 } |
67 | 99 |
68 GLOBAL(int) | 100 GLOBAL(int) |
69 jsimd_can_ycc_rgb (void) | 101 jsimd_can_ycc_rgb (void) |
70 { | 102 { |
| 103 init_simd(); |
| 104 |
71 /* The code is optimised for these values only */ | 105 /* The code is optimised for these values only */ |
72 if (BITS_IN_JSAMPLE != 8) | 106 if (BITS_IN_JSAMPLE != 8) |
73 return 0; | 107 return 0; |
74 if (sizeof(JDIMENSION) != 4) | 108 if (sizeof(JDIMENSION) != 4) |
75 return 0; | 109 return 0; |
76 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 110 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
77 return 0; | 111 return 0; |
78 | 112 |
79 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 113 if ((simd_support & JSIMD_SSE2) && |
80 return 0; | 114 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
| 115 return 1; |
81 | 116 |
82 return 1; | 117 return 0; |
83 } | 118 } |
84 | 119 |
85 GLOBAL(int) | 120 GLOBAL(int) |
86 jsimd_can_ycc_rgb565 (void) | 121 jsimd_can_ycc_rgb565 (void) |
87 { | 122 { |
88 return 0; | 123 return 0; |
89 } | 124 } |
90 | 125 |
91 #ifndef JPEG_DECODE_ONLY | |
92 GLOBAL(void) | 126 GLOBAL(void) |
93 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
94 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 128 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
95 JDIMENSION output_row, int num_rows) | 129 JDIMENSION output_row, int num_rows) |
96 { | 130 { |
97 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 131 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
98 | 132 |
99 switch(cinfo->in_color_space) | 133 switch(cinfo->in_color_space) { |
100 { | |
101 case JCS_EXT_RGB: | 134 case JCS_EXT_RGB: |
102 sse2fct=jsimd_extrgb_ycc_convert_sse2; | 135 sse2fct=jsimd_extrgb_ycc_convert_sse2; |
103 break; | 136 break; |
104 case JCS_EXT_RGBX: | 137 case JCS_EXT_RGBX: |
105 case JCS_EXT_RGBA: | 138 case JCS_EXT_RGBA: |
106 sse2fct=jsimd_extrgbx_ycc_convert_sse2; | 139 sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
107 break; | 140 break; |
108 case JCS_EXT_BGR: | 141 case JCS_EXT_BGR: |
109 sse2fct=jsimd_extbgr_ycc_convert_sse2; | 142 sse2fct=jsimd_extbgr_ycc_convert_sse2; |
110 break; | 143 break; |
111 case JCS_EXT_BGRX: | 144 case JCS_EXT_BGRX: |
112 case JCS_EXT_BGRA: | 145 case JCS_EXT_BGRA: |
113 sse2fct=jsimd_extbgrx_ycc_convert_sse2; | 146 sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
114 break; | 147 break; |
115 case JCS_EXT_XBGR: | 148 case JCS_EXT_XBGR: |
116 case JCS_EXT_ABGR: | 149 case JCS_EXT_ABGR: |
117 sse2fct=jsimd_extxbgr_ycc_convert_sse2; | 150 sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
118 break; | 151 break; |
119 case JCS_EXT_XRGB: | 152 case JCS_EXT_XRGB: |
120 case JCS_EXT_ARGB: | 153 case JCS_EXT_ARGB: |
121 sse2fct=jsimd_extxrgb_ycc_convert_sse2; | 154 sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
122 break; | 155 break; |
123 default: | 156 default: |
124 sse2fct=jsimd_rgb_ycc_convert_sse2; | 157 sse2fct=jsimd_rgb_ycc_convert_sse2; |
125 break; | 158 break; |
126 } | 159 } |
127 | 160 |
128 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 161 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
129 } | 162 } |
130 #endif | |
131 | 163 |
132 GLOBAL(void) | 164 GLOBAL(void) |
133 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 165 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
134 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 166 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
135 JDIMENSION output_row, int num_rows) | 167 JDIMENSION output_row, int num_rows) |
136 { | 168 { |
137 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 169 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
138 | 170 |
139 switch(cinfo->in_color_space) | 171 switch(cinfo->in_color_space) { |
140 { | |
141 case JCS_EXT_RGB: | 172 case JCS_EXT_RGB: |
142 sse2fct=jsimd_extrgb_gray_convert_sse2; | 173 sse2fct=jsimd_extrgb_gray_convert_sse2; |
143 break; | 174 break; |
144 case JCS_EXT_RGBX: | 175 case JCS_EXT_RGBX: |
145 case JCS_EXT_RGBA: | 176 case JCS_EXT_RGBA: |
146 sse2fct=jsimd_extrgbx_gray_convert_sse2; | 177 sse2fct=jsimd_extrgbx_gray_convert_sse2; |
147 break; | 178 break; |
148 case JCS_EXT_BGR: | 179 case JCS_EXT_BGR: |
149 sse2fct=jsimd_extbgr_gray_convert_sse2; | 180 sse2fct=jsimd_extbgr_gray_convert_sse2; |
150 break; | 181 break; |
(...skipping 17 matching lines...) Expand all Loading... |
168 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 199 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
169 } | 200 } |
170 | 201 |
171 GLOBAL(void) | 202 GLOBAL(void) |
172 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
173 JSAMPIMAGE input_buf, JDIMENSION input_row, | 204 JSAMPIMAGE input_buf, JDIMENSION input_row, |
174 JSAMPARRAY output_buf, int num_rows) | 205 JSAMPARRAY output_buf, int num_rows) |
175 { | 206 { |
176 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 207 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
177 | 208 |
178 switch(cinfo->out_color_space) | 209 switch(cinfo->out_color_space) { |
179 { | |
180 case JCS_EXT_RGB: | 210 case JCS_EXT_RGB: |
181 sse2fct=jsimd_ycc_extrgb_convert_sse2; | 211 sse2fct=jsimd_ycc_extrgb_convert_sse2; |
182 break; | 212 break; |
183 case JCS_EXT_RGBX: | 213 case JCS_EXT_RGBX: |
184 case JCS_EXT_RGBA: | 214 case JCS_EXT_RGBA: |
185 sse2fct=jsimd_ycc_extrgbx_convert_sse2; | 215 sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
186 break; | 216 break; |
187 case JCS_EXT_BGR: | 217 case JCS_EXT_BGR: |
188 sse2fct=jsimd_ycc_extbgr_convert_sse2; | 218 sse2fct=jsimd_ycc_extbgr_convert_sse2; |
189 break; | 219 break; |
(...skipping 17 matching lines...) Expand all Loading... |
207 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 237 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
208 } | 238 } |
209 | 239 |
210 GLOBAL(void) | 240 GLOBAL(void) |
211 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | 241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
212 JSAMPIMAGE input_buf, JDIMENSION input_row, | 242 JSAMPIMAGE input_buf, JDIMENSION input_row, |
213 JSAMPARRAY output_buf, int num_rows) | 243 JSAMPARRAY output_buf, int num_rows) |
214 { | 244 { |
215 } | 245 } |
216 | 246 |
217 #ifndef JPEG_DECODE_ONLY | |
218 GLOBAL(int) | 247 GLOBAL(int) |
219 jsimd_can_h2v2_downsample (void) | 248 jsimd_can_h2v2_downsample (void) |
220 { | 249 { |
221 /* The code is optimised for these values only */ | 250 init_simd(); |
222 if (BITS_IN_JSAMPLE != 8) | 251 |
223 return 0; | 252 /* The code is optimised for these values only */ |
224 if (sizeof(JDIMENSION) != 4) | 253 if (BITS_IN_JSAMPLE != 8) |
225 return 0; | 254 return 0; |
226 | 255 if (sizeof(JDIMENSION) != 4) |
227 return 1; | 256 return 0; |
| 257 |
| 258 if (simd_support & JSIMD_SSE2) |
| 259 return 1; |
| 260 |
| 261 return 0; |
228 } | 262 } |
229 | 263 |
230 GLOBAL(int) | 264 GLOBAL(int) |
231 jsimd_can_h2v1_downsample (void) | 265 jsimd_can_h2v1_downsample (void) |
232 { | 266 { |
233 /* The code is optimised for these values only */ | 267 init_simd(); |
234 if (BITS_IN_JSAMPLE != 8) | 268 |
235 return 0; | 269 /* The code is optimised for these values only */ |
236 if (sizeof(JDIMENSION) != 4) | 270 if (BITS_IN_JSAMPLE != 8) |
237 return 0; | 271 return 0; |
238 | 272 if (sizeof(JDIMENSION) != 4) |
239 return 1; | 273 return 0; |
240 } | 274 |
241 | 275 if (simd_support & JSIMD_SSE2) |
242 GLOBAL(void) | 276 return 1; |
243 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 277 |
| 278 return 0; |
| 279 } |
| 280 |
| 281 GLOBAL(void) |
| 282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
244 JSAMPARRAY input_data, JSAMPARRAY output_data) | 283 JSAMPARRAY input_data, JSAMPARRAY output_data) |
245 { | 284 { |
246 jsimd_h2v2_downsample_sse2(cinfo->image_width, | 285 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
247 cinfo->max_v_samp_factor, | 286 compptr->v_samp_factor, compptr->width_in_blocks, |
248 compptr->v_samp_factor, | |
249 compptr->width_in_blocks, | |
250 input_data, output_data); | 287 input_data, output_data); |
251 } | 288 } |
252 | 289 |
253 GLOBAL(void) | 290 GLOBAL(void) |
254 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
255 JSAMPARRAY input_data, JSAMPARRAY output_data) | 292 JSAMPARRAY input_data, JSAMPARRAY output_data) |
256 { | 293 { |
257 jsimd_h2v1_downsample_sse2(cinfo->image_width, | 294 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
258 cinfo->max_v_samp_factor, | 295 compptr->v_samp_factor, compptr->width_in_blocks, |
259 compptr->v_samp_factor, | |
260 compptr->width_in_blocks, | |
261 input_data, output_data); | 296 input_data, output_data); |
262 } | 297 } |
263 #endif | |
264 | 298 |
265 GLOBAL(int) | 299 GLOBAL(int) |
266 jsimd_can_h2v2_upsample (void) | 300 jsimd_can_h2v2_upsample (void) |
267 { | 301 { |
268 /* The code is optimised for these values only */ | 302 init_simd(); |
269 if (BITS_IN_JSAMPLE != 8) | 303 |
270 return 0; | 304 /* The code is optimised for these values only */ |
271 if (sizeof(JDIMENSION) != 4) | 305 if (BITS_IN_JSAMPLE != 8) |
272 return 0; | 306 return 0; |
273 | 307 if (sizeof(JDIMENSION) != 4) |
274 return 1; | 308 return 0; |
| 309 |
| 310 if (simd_support & JSIMD_SSE2) |
| 311 return 1; |
| 312 |
| 313 return 0; |
275 } | 314 } |
276 | 315 |
277 GLOBAL(int) | 316 GLOBAL(int) |
278 jsimd_can_h2v1_upsample (void) | 317 jsimd_can_h2v1_upsample (void) |
279 { | 318 { |
280 /* The code is optimised for these values only */ | 319 init_simd(); |
281 if (BITS_IN_JSAMPLE != 8) | 320 |
282 return 0; | 321 /* The code is optimised for these values only */ |
283 if (sizeof(JDIMENSION) != 4) | 322 if (BITS_IN_JSAMPLE != 8) |
284 return 0; | 323 return 0; |
285 | 324 if (sizeof(JDIMENSION) != 4) |
286 return 1; | 325 return 0; |
| 326 |
| 327 if (simd_support & JSIMD_SSE2) |
| 328 return 1; |
| 329 |
| 330 return 0; |
287 } | 331 } |
288 | 332 |
289 GLOBAL(void) | 333 GLOBAL(void) |
290 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 334 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
291 jpeg_component_info * compptr, | 335 jpeg_component_info *compptr, |
292 JSAMPARRAY input_data, | 336 JSAMPARRAY input_data, |
293 JSAMPARRAY * output_data_ptr) | 337 JSAMPARRAY *output_data_ptr) |
294 { | 338 { |
295 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, | 339 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
296 cinfo->output_width, | |
297 input_data, output_data_ptr); | 340 input_data, output_data_ptr); |
298 } | 341 } |
299 | 342 |
300 GLOBAL(void) | 343 GLOBAL(void) |
301 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 344 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
302 jpeg_component_info * compptr, | 345 jpeg_component_info *compptr, |
303 JSAMPARRAY input_data, | 346 JSAMPARRAY input_data, |
304 JSAMPARRAY * output_data_ptr) | 347 JSAMPARRAY *output_data_ptr) |
305 { | 348 { |
306 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, | 349 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, |
307 cinfo->output_width, | |
308 input_data, output_data_ptr); | 350 input_data, output_data_ptr); |
309 } | 351 } |
310 | 352 |
311 GLOBAL(int) | 353 GLOBAL(int) |
312 jsimd_can_h2v2_fancy_upsample (void) | 354 jsimd_can_h2v2_fancy_upsample (void) |
313 { | 355 { |
314 /* The code is optimised for these values only */ | 356 init_simd(); |
315 if (BITS_IN_JSAMPLE != 8) | 357 |
316 return 0; | 358 /* The code is optimised for these values only */ |
317 if (sizeof(JDIMENSION) != 4) | 359 if (BITS_IN_JSAMPLE != 8) |
318 return 0; | 360 return 0; |
319 | 361 if (sizeof(JDIMENSION) != 4) |
320 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 362 return 0; |
321 return 0; | 363 |
322 | 364 if ((simd_support & JSIMD_SSE2) && |
323 return 1; | 365 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 366 return 1; |
| 367 |
| 368 return 0; |
324 } | 369 } |
325 | 370 |
326 GLOBAL(int) | 371 GLOBAL(int) |
327 jsimd_can_h2v1_fancy_upsample (void) | 372 jsimd_can_h2v1_fancy_upsample (void) |
328 { | 373 { |
329 /* The code is optimised for these values only */ | 374 init_simd(); |
330 if (BITS_IN_JSAMPLE != 8) | 375 |
331 return 0; | 376 /* The code is optimised for these values only */ |
332 if (sizeof(JDIMENSION) != 4) | 377 if (BITS_IN_JSAMPLE != 8) |
333 return 0; | 378 return 0; |
334 | 379 if (sizeof(JDIMENSION) != 4) |
335 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 380 return 0; |
336 return 0; | 381 |
337 | 382 if ((simd_support & JSIMD_SSE2) && |
338 return 1; | 383 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
| 384 return 1; |
| 385 |
| 386 return 0; |
339 } | 387 } |
340 | 388 |
341 GLOBAL(void) | 389 GLOBAL(void) |
342 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
343 jpeg_component_info * compptr, | 391 jpeg_component_info *compptr, |
344 JSAMPARRAY input_data, | 392 JSAMPARRAY input_data, |
345 JSAMPARRAY * output_data_ptr) | 393 JSAMPARRAY *output_data_ptr) |
346 { | 394 { |
347 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 395 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
348 compptr->downsampled_width, | 396 compptr->downsampled_width, input_data, |
349 input_data, output_data_ptr); | 397 output_data_ptr); |
350 } | 398 } |
351 | 399 |
352 GLOBAL(void) | 400 GLOBAL(void) |
353 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
354 jpeg_component_info * compptr, | 402 jpeg_component_info *compptr, |
355 JSAMPARRAY input_data, | 403 JSAMPARRAY input_data, |
356 JSAMPARRAY * output_data_ptr) | 404 JSAMPARRAY *output_data_ptr) |
357 { | 405 { |
358 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 406 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
359 compptr->downsampled_width, | 407 compptr->downsampled_width, input_data, |
360 input_data, output_data_ptr); | 408 output_data_ptr); |
361 } | 409 } |
362 | 410 |
363 GLOBAL(int) | 411 GLOBAL(int) |
364 jsimd_can_h2v2_merged_upsample (void) | 412 jsimd_can_h2v2_merged_upsample (void) |
365 { | 413 { |
366 /* The code is optimised for these values only */ | 414 init_simd(); |
367 if (BITS_IN_JSAMPLE != 8) | 415 |
368 return 0; | 416 /* The code is optimised for these values only */ |
369 if (sizeof(JDIMENSION) != 4) | 417 if (BITS_IN_JSAMPLE != 8) |
370 return 0; | 418 return 0; |
371 | 419 if (sizeof(JDIMENSION) != 4) |
372 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 420 return 0; |
373 return 0; | 421 |
374 | 422 if ((simd_support & JSIMD_SSE2) && |
375 return 1; | 423 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
| 424 return 1; |
| 425 |
| 426 return 0; |
376 } | 427 } |
377 | 428 |
378 GLOBAL(int) | 429 GLOBAL(int) |
379 jsimd_can_h2v1_merged_upsample (void) | 430 jsimd_can_h2v1_merged_upsample (void) |
380 { | 431 { |
381 /* The code is optimised for these values only */ | 432 init_simd(); |
382 if (BITS_IN_JSAMPLE != 8) | 433 |
383 return 0; | 434 /* The code is optimised for these values only */ |
384 if (sizeof(JDIMENSION) != 4) | 435 if (BITS_IN_JSAMPLE != 8) |
385 return 0; | 436 return 0; |
386 | 437 if (sizeof(JDIMENSION) != 4) |
387 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 438 return 0; |
388 return 0; | 439 |
389 | 440 if ((simd_support & JSIMD_SSE2) && |
390 return 1; | 441 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
391 } | 442 return 1; |
392 | 443 |
393 GLOBAL(void) | 444 return 0; |
| 445 } |
| 446 |
| 447 GLOBAL(void) |
394 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, | 448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
395 JSAMPIMAGE input_buf, | 449 JSAMPIMAGE input_buf, |
396 JDIMENSION in_row_group_ctr, | 450 JDIMENSION in_row_group_ctr, |
397 JSAMPARRAY output_buf) | 451 JSAMPARRAY output_buf) |
398 { | 452 { |
399 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
400 | 454 |
401 switch(cinfo->out_color_space) | 455 switch(cinfo->out_color_space) { |
402 { | |
403 case JCS_EXT_RGB: | 456 case JCS_EXT_RGB: |
404 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; | 457 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
405 break; | 458 break; |
406 case JCS_EXT_RGBX: | 459 case JCS_EXT_RGBX: |
407 case JCS_EXT_RGBA: | 460 case JCS_EXT_RGBA: |
408 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; | 461 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
409 break; | 462 break; |
410 case JCS_EXT_BGR: | 463 case JCS_EXT_BGR: |
411 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; | 464 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
412 break; | 465 break; |
(...skipping 18 matching lines...) Expand all Loading... |
431 } | 484 } |
432 | 485 |
433 GLOBAL(void) | 486 GLOBAL(void) |
434 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, | 487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
435 JSAMPIMAGE input_buf, | 488 JSAMPIMAGE input_buf, |
436 JDIMENSION in_row_group_ctr, | 489 JDIMENSION in_row_group_ctr, |
437 JSAMPARRAY output_buf) | 490 JSAMPARRAY output_buf) |
438 { | 491 { |
439 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); | 492 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
440 | 493 |
441 switch(cinfo->out_color_space) | 494 switch(cinfo->out_color_space) { |
442 { | |
443 case JCS_EXT_RGB: | 495 case JCS_EXT_RGB: |
444 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; | 496 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
445 break; | 497 break; |
446 case JCS_EXT_RGBX: | 498 case JCS_EXT_RGBX: |
447 case JCS_EXT_RGBA: | 499 case JCS_EXT_RGBA: |
448 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; | 500 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
449 break; | 501 break; |
450 case JCS_EXT_BGR: | 502 case JCS_EXT_BGR: |
451 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; | 503 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
452 break; | 504 break; |
(...skipping 10 matching lines...) Expand all Loading... |
463 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; | 515 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
464 break; | 516 break; |
465 default: | 517 default: |
466 sse2fct=jsimd_h2v1_merged_upsample_sse2; | 518 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
467 break; | 519 break; |
468 } | 520 } |
469 | 521 |
470 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 522 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
471 } | 523 } |
472 | 524 |
473 #ifndef JPEG_DECODE_ONLY | |
474 GLOBAL(int) | 525 GLOBAL(int) |
475 jsimd_can_convsamp (void) | 526 jsimd_can_convsamp (void) |
476 { | 527 { |
| 528 init_simd(); |
| 529 |
477 /* The code is optimised for these values only */ | 530 /* The code is optimised for these values only */ |
478 if (DCTSIZE != 8) | 531 if (DCTSIZE != 8) |
479 return 0; | 532 return 0; |
480 if (BITS_IN_JSAMPLE != 8) | 533 if (BITS_IN_JSAMPLE != 8) |
481 return 0; | 534 return 0; |
482 if (sizeof(JDIMENSION) != 4) | 535 if (sizeof(JDIMENSION) != 4) |
483 return 0; | 536 return 0; |
484 if (sizeof(DCTELEM) != 2) | 537 if (sizeof(DCTELEM) != 2) |
485 return 0; | 538 return 0; |
486 | 539 |
487 return 1; | 540 if (simd_support & JSIMD_SSE2) |
| 541 return 1; |
| 542 |
| 543 return 0; |
488 } | 544 } |
489 | 545 |
490 GLOBAL(int) | 546 GLOBAL(int) |
491 jsimd_can_convsamp_float (void) | 547 jsimd_can_convsamp_float (void) |
492 { | 548 { |
| 549 init_simd(); |
| 550 |
493 /* The code is optimised for these values only */ | 551 /* The code is optimised for these values only */ |
494 if (DCTSIZE != 8) | 552 if (DCTSIZE != 8) |
495 return 0; | 553 return 0; |
496 if (BITS_IN_JSAMPLE != 8) | 554 if (BITS_IN_JSAMPLE != 8) |
497 return 0; | 555 return 0; |
498 if (sizeof(JDIMENSION) != 4) | 556 if (sizeof(JDIMENSION) != 4) |
499 return 0; | 557 return 0; |
500 if (sizeof(FAST_FLOAT) != 4) | 558 if (sizeof(FAST_FLOAT) != 4) |
501 return 0; | 559 return 0; |
502 | 560 |
503 return 1; | 561 if (simd_support & JSIMD_SSE2) |
| 562 return 1; |
| 563 |
| 564 return 0; |
504 } | 565 } |
505 | 566 |
506 GLOBAL(void) | 567 GLOBAL(void) |
507 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
508 DCTELEM * workspace) | 569 DCTELEM *workspace) |
509 { | 570 { |
510 jsimd_convsamp_sse2(sample_data, start_col, workspace); | 571 jsimd_convsamp_sse2(sample_data, start_col, workspace); |
511 } | 572 } |
512 | 573 |
513 GLOBAL(void) | 574 GLOBAL(void) |
514 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
515 FAST_FLOAT * workspace) | 576 FAST_FLOAT *workspace) |
516 { | 577 { |
517 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); | 578 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
518 } | 579 } |
519 | 580 |
520 GLOBAL(int) | 581 GLOBAL(int) |
521 jsimd_can_fdct_islow (void) | 582 jsimd_can_fdct_islow (void) |
522 { | 583 { |
| 584 init_simd(); |
| 585 |
523 /* The code is optimised for these values only */ | 586 /* The code is optimised for these values only */ |
524 if (DCTSIZE != 8) | 587 if (DCTSIZE != 8) |
525 return 0; | 588 return 0; |
526 if (sizeof(DCTELEM) != 2) | 589 if (sizeof(DCTELEM) != 2) |
527 return 0; | 590 return 0; |
528 | 591 |
529 if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) | 592 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
530 return 0; | 593 return 1; |
531 | 594 |
532 return 1; | 595 return 0; |
533 } | 596 } |
534 | 597 |
535 GLOBAL(int) | 598 GLOBAL(int) |
536 jsimd_can_fdct_ifast (void) | 599 jsimd_can_fdct_ifast (void) |
537 { | 600 { |
| 601 init_simd(); |
| 602 |
538 /* The code is optimised for these values only */ | 603 /* The code is optimised for these values only */ |
539 if (DCTSIZE != 8) | 604 if (DCTSIZE != 8) |
540 return 0; | 605 return 0; |
541 if (sizeof(DCTELEM) != 2) | 606 if (sizeof(DCTELEM) != 2) |
542 return 0; | 607 return 0; |
543 | 608 |
544 if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) | 609 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) |
545 return 0; | 610 return 1; |
546 | 611 |
547 return 1; | 612 return 0; |
548 } | 613 } |
549 | 614 |
550 GLOBAL(int) | 615 GLOBAL(int) |
551 jsimd_can_fdct_float (void) | 616 jsimd_can_fdct_float (void) |
552 { | 617 { |
| 618 init_simd(); |
| 619 |
553 /* The code is optimised for these values only */ | 620 /* The code is optimised for these values only */ |
554 if (DCTSIZE != 8) | 621 if (DCTSIZE != 8) |
555 return 0; | 622 return 0; |
556 if (sizeof(FAST_FLOAT) != 4) | 623 if (sizeof(FAST_FLOAT) != 4) |
557 return 0; | 624 return 0; |
558 | 625 |
559 if (!IS_ALIGNED_SSE(jconst_fdct_float_sse)) | 626 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
560 return 0; | 627 return 1; |
561 | 628 |
562 return 1; | 629 return 0; |
563 } | 630 } |
564 | 631 |
565 GLOBAL(void) | 632 GLOBAL(void) |
566 jsimd_fdct_islow (DCTELEM * data) | 633 jsimd_fdct_islow (DCTELEM *data) |
567 { | 634 { |
568 jsimd_fdct_islow_sse2(data); | 635 jsimd_fdct_islow_sse2(data); |
569 } | 636 } |
570 | 637 |
571 GLOBAL(void) | 638 GLOBAL(void) |
572 jsimd_fdct_ifast (DCTELEM * data) | 639 jsimd_fdct_ifast (DCTELEM *data) |
573 { | 640 { |
574 jsimd_fdct_ifast_sse2(data); | 641 jsimd_fdct_ifast_sse2(data); |
575 } | 642 } |
576 | 643 |
577 GLOBAL(void) | 644 GLOBAL(void) |
578 jsimd_fdct_float (FAST_FLOAT * data) | 645 jsimd_fdct_float (FAST_FLOAT *data) |
579 { | 646 { |
580 jsimd_fdct_float_sse(data); | 647 jsimd_fdct_float_sse(data); |
581 } | 648 } |
582 | 649 |
583 GLOBAL(int) | 650 GLOBAL(int) |
584 jsimd_can_quantize (void) | 651 jsimd_can_quantize (void) |
585 { | 652 { |
| 653 init_simd(); |
| 654 |
586 /* The code is optimised for these values only */ | 655 /* The code is optimised for these values only */ |
587 if (DCTSIZE != 8) | 656 if (DCTSIZE != 8) |
588 return 0; | 657 return 0; |
589 if (sizeof(JCOEF) != 2) | 658 if (sizeof(JCOEF) != 2) |
590 return 0; | 659 return 0; |
591 if (sizeof(DCTELEM) != 2) | 660 if (sizeof(DCTELEM) != 2) |
592 return 0; | 661 return 0; |
593 | 662 |
594 return 1; | 663 if (simd_support & JSIMD_SSE2) |
| 664 return 1; |
| 665 |
| 666 return 0; |
595 } | 667 } |
596 | 668 |
597 GLOBAL(int) | 669 GLOBAL(int) |
598 jsimd_can_quantize_float (void) | 670 jsimd_can_quantize_float (void) |
599 { | 671 { |
| 672 init_simd(); |
| 673 |
600 /* The code is optimised for these values only */ | 674 /* The code is optimised for these values only */ |
601 if (DCTSIZE != 8) | 675 if (DCTSIZE != 8) |
602 return 0; | 676 return 0; |
603 if (sizeof(JCOEF) != 2) | 677 if (sizeof(JCOEF) != 2) |
604 return 0; | 678 return 0; |
605 if (sizeof(FAST_FLOAT) != 4) | 679 if (sizeof(FAST_FLOAT) != 4) |
606 return 0; | 680 return 0; |
607 | 681 |
608 return 1; | 682 if (simd_support & JSIMD_SSE2) |
| 683 return 1; |
| 684 |
| 685 return 0; |
609 } | 686 } |
610 | 687 |
611 GLOBAL(void) | 688 GLOBAL(void) |
612 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, |
613 DCTELEM * workspace) | 690 DCTELEM *workspace) |
614 { | 691 { |
615 jsimd_quantize_sse2(coef_block, divisors, workspace); | 692 jsimd_quantize_sse2(coef_block, divisors, workspace); |
616 } | 693 } |
617 | 694 |
618 GLOBAL(void) | 695 GLOBAL(void) |
619 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
620 FAST_FLOAT * workspace) | 697 FAST_FLOAT *workspace) |
621 { | 698 { |
622 jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 699 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
623 } | 700 } |
624 #endif | |
625 | 701 |
626 GLOBAL(int) | 702 GLOBAL(int) |
627 jsimd_can_idct_2x2 (void) | 703 jsimd_can_idct_2x2 (void) |
628 { | 704 { |
| 705 init_simd(); |
| 706 |
629 /* The code is optimised for these values only */ | 707 /* The code is optimised for these values only */ |
630 if (DCTSIZE != 8) | 708 if (DCTSIZE != 8) |
631 return 0; | 709 return 0; |
632 if (sizeof(JCOEF) != 2) | 710 if (sizeof(JCOEF) != 2) |
633 return 0; | 711 return 0; |
634 if (BITS_IN_JSAMPLE != 8) | 712 if (BITS_IN_JSAMPLE != 8) |
635 return 0; | 713 return 0; |
636 if (sizeof(JDIMENSION) != 4) | 714 if (sizeof(JDIMENSION) != 4) |
637 return 0; | 715 return 0; |
638 if (sizeof(ISLOW_MULT_TYPE) != 2) | 716 if (sizeof(ISLOW_MULT_TYPE) != 2) |
639 return 0; | 717 return 0; |
640 | 718 |
641 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 719 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
642 return 0; | 720 return 1; |
643 | 721 |
644 return 1; | 722 return 0; |
645 } | 723 } |
646 | 724 |
647 GLOBAL(int) | 725 GLOBAL(int) |
648 jsimd_can_idct_4x4 (void) | 726 jsimd_can_idct_4x4 (void) |
649 { | 727 { |
| 728 init_simd(); |
| 729 |
650 /* The code is optimised for these values only */ | 730 /* The code is optimised for these values only */ |
651 if (DCTSIZE != 8) | 731 if (DCTSIZE != 8) |
652 return 0; | 732 return 0; |
653 if (sizeof(JCOEF) != 2) | 733 if (sizeof(JCOEF) != 2) |
654 return 0; | 734 return 0; |
655 if (BITS_IN_JSAMPLE != 8) | 735 if (BITS_IN_JSAMPLE != 8) |
656 return 0; | 736 return 0; |
657 if (sizeof(JDIMENSION) != 4) | 737 if (sizeof(JDIMENSION) != 4) |
658 return 0; | 738 return 0; |
659 if (sizeof(ISLOW_MULT_TYPE) != 2) | 739 if (sizeof(ISLOW_MULT_TYPE) != 2) |
660 return 0; | 740 return 0; |
661 | 741 |
662 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2)) | 742 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
663 return 0; | 743 return 1; |
664 | 744 |
665 return 1; | 745 return 0; |
666 } | 746 } |
667 | 747 |
668 GLOBAL(void) | 748 GLOBAL(void) |
669 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
670 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 750 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
671 JDIMENSION output_col) | 751 JDIMENSION output_col) |
672 { | 752 { |
673 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 753 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
674 } | 754 } |
675 | 755 |
676 GLOBAL(void) | 756 GLOBAL(void) |
677 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
678 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 758 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
679 JDIMENSION output_col) | 759 JDIMENSION output_col) |
680 { | 760 { |
681 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 761 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
682 } | 762 } |
683 | 763 |
684 GLOBAL(int) | 764 GLOBAL(int) |
685 jsimd_can_idct_islow (void) | 765 jsimd_can_idct_islow (void) |
686 { | 766 { |
| 767 init_simd(); |
| 768 |
687 /* The code is optimised for these values only */ | 769 /* The code is optimised for these values only */ |
688 if (DCTSIZE != 8) | 770 if (DCTSIZE != 8) |
689 return 0; | 771 return 0; |
690 if (sizeof(JCOEF) != 2) | 772 if (sizeof(JCOEF) != 2) |
691 return 0; | 773 return 0; |
692 if (BITS_IN_JSAMPLE != 8) | 774 if (BITS_IN_JSAMPLE != 8) |
693 return 0; | 775 return 0; |
694 if (sizeof(JDIMENSION) != 4) | 776 if (sizeof(JDIMENSION) != 4) |
695 return 0; | 777 return 0; |
696 if (sizeof(ISLOW_MULT_TYPE) != 2) | 778 if (sizeof(ISLOW_MULT_TYPE) != 2) |
697 return 0; | 779 return 0; |
698 | 780 |
699 if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2)) | 781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
700 return 0; | 782 return 1; |
701 | 783 |
702 return 1; | 784 return 0; |
703 } | 785 } |
704 | 786 |
705 GLOBAL(int) | 787 GLOBAL(int) |
706 jsimd_can_idct_ifast (void) | 788 jsimd_can_idct_ifast (void) |
707 { | 789 { |
| 790 init_simd(); |
| 791 |
708 /* The code is optimised for these values only */ | 792 /* The code is optimised for these values only */ |
709 if (DCTSIZE != 8) | 793 if (DCTSIZE != 8) |
710 return 0; | 794 return 0; |
711 if (sizeof(JCOEF) != 2) | 795 if (sizeof(JCOEF) != 2) |
712 return 0; | 796 return 0; |
713 if (BITS_IN_JSAMPLE != 8) | 797 if (BITS_IN_JSAMPLE != 8) |
714 return 0; | 798 return 0; |
715 if (sizeof(JDIMENSION) != 4) | 799 if (sizeof(JDIMENSION) != 4) |
716 return 0; | 800 return 0; |
717 if (sizeof(IFAST_MULT_TYPE) != 2) | 801 if (sizeof(IFAST_MULT_TYPE) != 2) |
718 return 0; | 802 return 0; |
719 if (IFAST_SCALE_BITS != 2) | 803 if (IFAST_SCALE_BITS != 2) |
720 return 0; | 804 return 0; |
721 | 805 |
722 if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) | 806 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
723 return 0; | 807 return 1; |
724 | 808 |
725 return 1; | 809 return 0; |
726 } | 810 } |
727 | 811 |
728 GLOBAL(int) | 812 GLOBAL(int) |
729 jsimd_can_idct_float (void) | 813 jsimd_can_idct_float (void) |
730 { | 814 { |
| 815 init_simd(); |
| 816 |
731 if (DCTSIZE != 8) | 817 if (DCTSIZE != 8) |
732 return 0; | 818 return 0; |
733 if (sizeof(JCOEF) != 2) | 819 if (sizeof(JCOEF) != 2) |
734 return 0; | 820 return 0; |
735 if (BITS_IN_JSAMPLE != 8) | 821 if (BITS_IN_JSAMPLE != 8) |
736 return 0; | 822 return 0; |
737 if (sizeof(JDIMENSION) != 4) | 823 if (sizeof(JDIMENSION) != 4) |
738 return 0; | 824 return 0; |
739 if (sizeof(FAST_FLOAT) != 4) | 825 if (sizeof(FAST_FLOAT) != 4) |
740 return 0; | 826 return 0; |
741 if (sizeof(FLOAT_MULT_TYPE) != 4) | 827 if (sizeof(FLOAT_MULT_TYPE) != 4) |
742 return 0; | 828 return 0; |
743 | 829 |
744 if (!IS_ALIGNED_SSE(jconst_idct_float_sse2)) | 830 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
745 return 0; | 831 return 1; |
746 | 832 |
747 return 1; | 833 return 0; |
748 } | 834 } |
749 | 835 |
750 GLOBAL(void) | 836 GLOBAL(void) |
751 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
752 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 838 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
753 JDIMENSION output_col) | 839 JDIMENSION output_col) |
754 { | 840 { |
755 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 841 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, |
| 842 output_col); |
756 } | 843 } |
757 | 844 |
758 GLOBAL(void) | 845 GLOBAL(void) |
759 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
760 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 847 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
761 JDIMENSION output_col) | 848 JDIMENSION output_col) |
762 { | 849 { |
763 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); | 850 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, |
| 851 output_col); |
764 } | 852 } |
765 | 853 |
766 GLOBAL(void) | 854 GLOBAL(void) |
767 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
768 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 856 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
769 JDIMENSION output_col) | 857 JDIMENSION output_col) |
770 { | 858 { |
771 jsimd_idct_float_sse2(compptr->dct_table, coef_block, | 859 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, |
772 output_buf, output_col); | 860 output_col); |
773 } | 861 } |
| 862 |
| 863 GLOBAL(int) |
| 864 jsimd_can_huff_encode_one_block (void) |
| 865 { |
| 866 init_simd(); |
| 867 |
| 868 if (DCTSIZE != 8) |
| 869 return 0; |
| 870 if (sizeof(JCOEF) != 2) |
| 871 return 0; |
| 872 |
| 873 if ((simd_support & JSIMD_SSE2) && simd_huffman && |
| 874 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) |
| 875 return 1; |
| 876 |
| 877 return 0; |
| 878 } |
| 879 |
| 880 GLOBAL(JOCTET*) |
| 881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, |
| 882 int last_dc_val, c_derived_tbl *dctbl, |
| 883 c_derived_tbl *actbl) |
| 884 { |
| 885 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, |
| 886 dctbl, actbl); |
| 887 } |
OLD | NEW |