OLD | NEW |
1 /* | 1 /* |
2 * jsimd_arm64.c | 2 * jsimd_arm64.c |
3 * | 3 * |
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 * Copyright 2009-2011, 2013-2014 D. R. Commander | 5 * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander |
| 6 * Copyright 2015-2016 Matthieu Darbois |
6 * | 7 * |
7 * Based on the x86 SIMD extension for IJG JPEG library, | 8 * Based on the x86 SIMD extension for IJG JPEG library, |
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 * | 11 * |
11 * This file contains the interface between the "normal" portions | 12 * This file contains the interface between the "normal" portions |
12 * of the library and the SIMD implementations when running on a | 13 * of the library and the SIMD implementations when running on a |
13 * 64-bit ARM architecture. | 14 * 64-bit ARM architecture. |
14 */ | 15 */ |
15 | 16 |
16 #define JPEG_INTERNALS | 17 #define JPEG_INTERNALS |
17 #include "../jinclude.h" | 18 #include "../jinclude.h" |
18 #include "../jpeglib.h" | 19 #include "../jpeglib.h" |
19 #include "../jsimd.h" | 20 #include "../jsimd.h" |
20 #include "../jdct.h" | 21 #include "../jdct.h" |
21 #include "../jsimddct.h" | 22 #include "../jsimddct.h" |
22 #include "jsimd.h" | 23 #include "jsimd.h" |
23 | 24 |
24 #include <stdio.h> | 25 #include <stdio.h> |
25 #include <string.h> | 26 #include <string.h> |
26 #include <ctype.h> | 27 #include <ctype.h> |
27 | 28 |
| 29 #define JSIMD_FASTLD3 1 |
| 30 #define JSIMD_FASTST3 2 |
| 31 #define JSIMD_FASTTBL 4 |
| 32 |
28 static unsigned int simd_support = ~0; | 33 static unsigned int simd_support = ~0; |
| 34 static unsigned int simd_huffman = 1; |
| 35 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | |
| 36 JSIMD_FASTTBL; |
| 37 |
| 38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
| 39 |
| 40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) |
| 41 |
| 42 LOCAL(int) |
| 43 check_cpuinfo (char *buffer, const char *field, char *value) |
| 44 { |
| 45 char *p; |
| 46 if (*value == 0) |
| 47 return 0; |
| 48 if (strncmp(buffer, field, strlen(field)) != 0) |
| 49 return 0; |
| 50 buffer += strlen(field); |
| 51 while (isspace(*buffer)) |
| 52 buffer++; |
| 53 |
| 54 /* Check if 'value' is present in the buffer as a separate word */ |
| 55 while ((p = strstr(buffer, value))) { |
| 56 if (p > buffer && !isspace(*(p - 1))) { |
| 57 buffer++; |
| 58 continue; |
| 59 } |
| 60 p += strlen(value); |
| 61 if (*p != 0 && !isspace(*p)) { |
| 62 buffer++; |
| 63 continue; |
| 64 } |
| 65 return 1; |
| 66 } |
| 67 return 0; |
| 68 } |
| 69 |
| 70 LOCAL(int) |
| 71 parse_proc_cpuinfo (int bufsize) |
| 72 { |
| 73 char *buffer = (char *)malloc(bufsize); |
| 74 FILE *fd; |
| 75 |
| 76 if (!buffer) |
| 77 return 0; |
| 78 |
| 79 fd = fopen("/proc/cpuinfo", "r"); |
| 80 if (fd) { |
| 81 while (fgets(buffer, bufsize, fd)) { |
| 82 if (!strchr(buffer, '\n') && !feof(fd)) { |
| 83 /* "impossible" happened - insufficient size of the buffer! */ |
| 84 fclose(fd); |
| 85 free(buffer); |
| 86 return 0; |
| 87 } |
| 88 if (check_cpuinfo(buffer, "CPU part", "0xd03") || |
| 89 check_cpuinfo(buffer, "CPU part", "0xd07")) |
| 90 /* The Cortex-A53 has a slow tbl implementation. We can gain a few |
| 91 percent speedup by disabling the use of that instruction. The |
| 92 speedup on Cortex-A57 is more subtle but still measurable. */ |
| 93 simd_features &= ~JSIMD_FASTTBL; |
| 94 else if (check_cpuinfo(buffer, "CPU part", "0x0a1")) |
| 95 /* The SIMD version of Huffman encoding is slower than the C version on |
| 96 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that |
| 97 CPU. */ |
| 98 simd_huffman = simd_features = 0; |
| 99 } |
| 100 fclose(fd); |
| 101 } |
| 102 free(buffer); |
| 103 return 1; |
| 104 } |
| 105 |
| 106 #endif |
29 | 107 |
30 /* | 108 /* |
31 * Check what SIMD accelerations are supported. | 109 * Check what SIMD accelerations are supported. |
32 * | 110 * |
33 * FIXME: This code is racy under a multi-threaded environment. | 111 * FIXME: This code is racy under a multi-threaded environment. |
34 */ | 112 */ |
35 | 113 |
36 /* | 114 /* |
37 * ARMv8 architectures support NEON extensions by default. | 115 * ARMv8 architectures support NEON extensions by default. |
38 * It is no longer optional as it was with ARMv7. | 116 * It is no longer optional as it was with ARMv7. |
39 */ | 117 */ |
40 | 118 |
41 | 119 |
42 LOCAL(void) | 120 LOCAL(void) |
43 init_simd (void) | 121 init_simd (void) |
44 { | 122 { |
45 char *env = NULL; | 123 char *env = NULL; |
| 124 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
| 125 int bufsize = 1024; /* an initial guess for the line buffer size limit */ |
| 126 #endif |
46 | 127 |
47 if (simd_support != ~0U) | 128 if (simd_support != ~0U) |
48 return; | 129 return; |
49 | 130 |
50 simd_support = 0; | 131 simd_support = 0; |
51 | 132 |
52 simd_support |= JSIMD_ARM_NEON; | 133 simd_support |= JSIMD_ARM_NEON; |
| 134 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
| 135 while (!parse_proc_cpuinfo(bufsize)) { |
| 136 bufsize *= 2; |
| 137 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) |
| 138 break; |
| 139 } |
| 140 #endif |
53 | 141 |
54 /* Force different settings through environment variables */ | 142 /* Force different settings through environment variables */ |
55 env = getenv("JSIMD_FORCENEON"); | 143 env = getenv("JSIMD_FORCENEON"); |
56 if ((env != NULL) && (strcmp(env, "1") == 0)) | 144 if ((env != NULL) && (strcmp(env, "1") == 0)) |
57 simd_support &= JSIMD_ARM_NEON; | 145 simd_support &= JSIMD_ARM_NEON; |
58 env = getenv("JSIMD_FORCENONE"); | 146 env = getenv("JSIMD_FORCENONE"); |
59 if ((env != NULL) && (strcmp(env, "1") == 0)) | 147 if ((env != NULL) && (strcmp(env, "1") == 0)) |
60 simd_support = 0; | 148 simd_support = 0; |
| 149 env = getenv("JSIMD_NOHUFFENC"); |
| 150 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 151 simd_huffman = 0; |
| 152 env = getenv("JSIMD_FASTLD3"); |
| 153 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 154 simd_features |= JSIMD_FASTLD3; |
| 155 if ((env != NULL) && (strcmp(env, "0") == 0)) |
| 156 simd_features &= ~JSIMD_FASTLD3; |
| 157 env = getenv("JSIMD_FASTST3"); |
| 158 if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 159 simd_features |= JSIMD_FASTST3; |
| 160 if ((env != NULL) && (strcmp(env, "0") == 0)) |
| 161 simd_features &= ~JSIMD_FASTST3; |
61 } | 162 } |
62 | 163 |
63 GLOBAL(int) | 164 GLOBAL(int) |
64 jsimd_can_rgb_ycc (void) | 165 jsimd_can_rgb_ycc (void) |
65 { | 166 { |
66 init_simd(); | 167 init_simd(); |
67 | 168 |
| 169 /* The code is optimised for these values only */ |
| 170 if (BITS_IN_JSAMPLE != 8) |
| 171 return 0; |
| 172 if (sizeof(JDIMENSION) != 4) |
| 173 return 0; |
| 174 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 175 return 0; |
| 176 |
| 177 if (simd_support & JSIMD_ARM_NEON) |
| 178 return 1; |
| 179 |
68 return 0; | 180 return 0; |
69 } | 181 } |
70 | 182 |
71 GLOBAL(int) | 183 GLOBAL(int) |
72 jsimd_can_rgb_gray (void) | 184 jsimd_can_rgb_gray (void) |
73 { | 185 { |
74 init_simd(); | 186 init_simd(); |
75 | 187 |
76 return 0; | 188 return 0; |
77 } | 189 } |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
110 return 1; | 222 return 1; |
111 | 223 |
112 return 0; | 224 return 0; |
113 } | 225 } |
114 | 226 |
115 GLOBAL(void) | 227 GLOBAL(void) |
116 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 228 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
117 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 229 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
118 JDIMENSION output_row, int num_rows) | 230 JDIMENSION output_row, int num_rows) |
119 { | 231 { |
| 232 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
| 233 |
| 234 switch(cinfo->in_color_space) { |
| 235 case JCS_EXT_RGB: |
| 236 if (simd_features & JSIMD_FASTLD3) |
| 237 neonfct=jsimd_extrgb_ycc_convert_neon; |
| 238 else |
| 239 neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; |
| 240 break; |
| 241 case JCS_EXT_RGBX: |
| 242 case JCS_EXT_RGBA: |
| 243 neonfct=jsimd_extrgbx_ycc_convert_neon; |
| 244 break; |
| 245 case JCS_EXT_BGR: |
| 246 if (simd_features & JSIMD_FASTLD3) |
| 247 neonfct=jsimd_extbgr_ycc_convert_neon; |
| 248 else |
| 249 neonfct=jsimd_extbgr_ycc_convert_neon_slowld3; |
| 250 break; |
| 251 case JCS_EXT_BGRX: |
| 252 case JCS_EXT_BGRA: |
| 253 neonfct=jsimd_extbgrx_ycc_convert_neon; |
| 254 break; |
| 255 case JCS_EXT_XBGR: |
| 256 case JCS_EXT_ABGR: |
| 257 neonfct=jsimd_extxbgr_ycc_convert_neon; |
| 258 break; |
| 259 case JCS_EXT_XRGB: |
| 260 case JCS_EXT_ARGB: |
| 261 neonfct=jsimd_extxrgb_ycc_convert_neon; |
| 262 break; |
| 263 default: |
| 264 if (simd_features & JSIMD_FASTLD3) |
| 265 neonfct=jsimd_extrgb_ycc_convert_neon; |
| 266 else |
| 267 neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; |
| 268 break; |
| 269 } |
| 270 |
| 271 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
120 } | 272 } |
121 | 273 |
122 GLOBAL(void) | 274 GLOBAL(void) |
123 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 275 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
124 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 276 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
125 JDIMENSION output_row, int num_rows) | 277 JDIMENSION output_row, int num_rows) |
126 { | 278 { |
127 } | 279 } |
128 | 280 |
129 GLOBAL(void) | 281 GLOBAL(void) |
130 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, | 282 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
131 JSAMPIMAGE input_buf, JDIMENSION input_row, | 283 JSAMPIMAGE input_buf, JDIMENSION input_row, |
132 JSAMPARRAY output_buf, int num_rows) | 284 JSAMPARRAY output_buf, int num_rows) |
133 { | 285 { |
134 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); | 286 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
135 | 287 |
136 switch(cinfo->out_color_space) { | 288 switch(cinfo->out_color_space) { |
137 case JCS_EXT_RGB: | 289 case JCS_EXT_RGB: |
138 neonfct=jsimd_ycc_extrgb_convert_neon; | 290 if (simd_features & JSIMD_FASTST3) |
| 291 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 292 else |
| 293 neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; |
139 break; | 294 break; |
140 case JCS_EXT_RGBX: | 295 case JCS_EXT_RGBX: |
141 case JCS_EXT_RGBA: | 296 case JCS_EXT_RGBA: |
142 neonfct=jsimd_ycc_extrgbx_convert_neon; | 297 neonfct=jsimd_ycc_extrgbx_convert_neon; |
143 break; | 298 break; |
144 case JCS_EXT_BGR: | 299 case JCS_EXT_BGR: |
145 neonfct=jsimd_ycc_extbgr_convert_neon; | 300 if (simd_features & JSIMD_FASTST3) |
| 301 neonfct=jsimd_ycc_extbgr_convert_neon; |
| 302 else |
| 303 neonfct=jsimd_ycc_extbgr_convert_neon_slowst3; |
146 break; | 304 break; |
147 case JCS_EXT_BGRX: | 305 case JCS_EXT_BGRX: |
148 case JCS_EXT_BGRA: | 306 case JCS_EXT_BGRA: |
149 neonfct=jsimd_ycc_extbgrx_convert_neon; | 307 neonfct=jsimd_ycc_extbgrx_convert_neon; |
150 break; | 308 break; |
151 case JCS_EXT_XBGR: | 309 case JCS_EXT_XBGR: |
152 case JCS_EXT_ABGR: | 310 case JCS_EXT_ABGR: |
153 neonfct=jsimd_ycc_extxbgr_convert_neon; | 311 neonfct=jsimd_ycc_extxbgr_convert_neon; |
154 break; | 312 break; |
155 case JCS_EXT_XRGB: | 313 case JCS_EXT_XRGB: |
156 case JCS_EXT_ARGB: | 314 case JCS_EXT_ARGB: |
157 neonfct=jsimd_ycc_extxrgb_convert_neon; | 315 neonfct=jsimd_ycc_extxrgb_convert_neon; |
158 break; | 316 break; |
159 default: | 317 default: |
160 neonfct=jsimd_ycc_extrgb_convert_neon; | 318 if (simd_features & JSIMD_FASTST3) |
| 319 neonfct=jsimd_ycc_extrgb_convert_neon; |
| 320 else |
| 321 neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; |
161 break; | 322 break; |
162 } | 323 } |
163 | 324 |
164 if (simd_support & JSIMD_ARM_NEON) | 325 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
165 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | |
166 } | 326 } |
167 | 327 |
168 GLOBAL(void) | 328 GLOBAL(void) |
169 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, | 329 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
170 JSAMPIMAGE input_buf, JDIMENSION input_row, | 330 JSAMPIMAGE input_buf, JDIMENSION input_row, |
171 JSAMPARRAY output_buf, int num_rows) | 331 JSAMPARRAY output_buf, int num_rows) |
172 { | 332 { |
173 if (simd_support & JSIMD_ARM_NEON) | 333 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, |
174 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, | 334 output_buf, num_rows); |
175 output_buf, num_rows); | |
176 } | 335 } |
177 | 336 |
178 GLOBAL(int) | 337 GLOBAL(int) |
179 jsimd_can_h2v2_downsample (void) | 338 jsimd_can_h2v2_downsample (void) |
180 { | 339 { |
181 init_simd(); | 340 init_simd(); |
182 | 341 |
| 342 /* The code is optimised for these values only */ |
| 343 if (BITS_IN_JSAMPLE != 8) |
| 344 return 0; |
| 345 if (DCTSIZE != 8) |
| 346 return 0; |
| 347 if (sizeof(JDIMENSION) != 4) |
| 348 return 0; |
| 349 |
| 350 if (simd_support & JSIMD_ARM_NEON) |
| 351 return 1; |
| 352 |
183 return 0; | 353 return 0; |
184 } | 354 } |
185 | 355 |
186 GLOBAL(int) | 356 GLOBAL(int) |
187 jsimd_can_h2v1_downsample (void) | 357 jsimd_can_h2v1_downsample (void) |
188 { | 358 { |
189 init_simd(); | 359 init_simd(); |
190 | 360 |
| 361 /* The code is optimised for these values only */ |
| 362 if (BITS_IN_JSAMPLE != 8) |
| 363 return 0; |
| 364 if (DCTSIZE != 8) |
| 365 return 0; |
| 366 if (sizeof(JDIMENSION) != 4) |
| 367 return 0; |
| 368 |
| 369 if (simd_support & JSIMD_ARM_NEON) |
| 370 return 1; |
| 371 |
191 return 0; | 372 return 0; |
192 } | 373 } |
193 | 374 |
194 GLOBAL(void) | 375 GLOBAL(void) |
195 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 376 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
196 JSAMPARRAY input_data, JSAMPARRAY output_data) | 377 JSAMPARRAY input_data, JSAMPARRAY output_data) |
197 { | 378 { |
| 379 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, |
| 380 compptr->v_samp_factor, compptr->width_in_blocks, |
| 381 input_data, output_data); |
198 } | 382 } |
199 | 383 |
200 GLOBAL(void) | 384 GLOBAL(void) |
201 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 385 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, |
202 JSAMPARRAY input_data, JSAMPARRAY output_data) | 386 JSAMPARRAY input_data, JSAMPARRAY output_data) |
203 { | 387 { |
| 388 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, |
| 389 compptr->v_samp_factor, compptr->width_in_blocks, |
| 390 input_data, output_data); |
204 } | 391 } |
205 | 392 |
206 GLOBAL(int) | 393 GLOBAL(int) |
207 jsimd_can_h2v2_upsample (void) | 394 jsimd_can_h2v2_upsample (void) |
208 { | 395 { |
209 init_simd(); | 396 init_simd(); |
210 | 397 |
211 return 0; | 398 return 0; |
212 } | 399 } |
213 | 400 |
214 GLOBAL(int) | 401 GLOBAL(int) |
215 jsimd_can_h2v1_upsample (void) | 402 jsimd_can_h2v1_upsample (void) |
216 { | 403 { |
217 init_simd(); | 404 init_simd(); |
218 | 405 |
219 return 0; | 406 return 0; |
220 } | 407 } |
221 | 408 |
222 GLOBAL(void) | 409 GLOBAL(void) |
223 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 410 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
224 jpeg_component_info * compptr, | 411 jpeg_component_info *compptr, |
225 JSAMPARRAY input_data, | 412 JSAMPARRAY input_data, |
226 JSAMPARRAY * output_data_ptr) | 413 JSAMPARRAY *output_data_ptr) |
227 { | 414 { |
228 } | 415 } |
229 | 416 |
230 GLOBAL(void) | 417 GLOBAL(void) |
231 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 418 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
232 jpeg_component_info * compptr, | 419 jpeg_component_info *compptr, |
233 JSAMPARRAY input_data, | 420 JSAMPARRAY input_data, |
234 JSAMPARRAY * output_data_ptr) | 421 JSAMPARRAY *output_data_ptr) |
235 { | 422 { |
236 } | 423 } |
237 | 424 |
238 GLOBAL(int) | 425 GLOBAL(int) |
239 jsimd_can_h2v2_fancy_upsample (void) | 426 jsimd_can_h2v2_fancy_upsample (void) |
240 { | 427 { |
241 init_simd(); | 428 init_simd(); |
242 | 429 |
243 return 0; | 430 return 0; |
244 } | 431 } |
245 | 432 |
246 GLOBAL(int) | 433 GLOBAL(int) |
247 jsimd_can_h2v1_fancy_upsample (void) | 434 jsimd_can_h2v1_fancy_upsample (void) |
248 { | 435 { |
249 init_simd(); | 436 init_simd(); |
250 | 437 |
251 return 0; | 438 return 0; |
252 } | 439 } |
253 | 440 |
254 GLOBAL(void) | 441 GLOBAL(void) |
255 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 442 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
256 jpeg_component_info * compptr, | 443 jpeg_component_info *compptr, |
257 JSAMPARRAY input_data, | 444 JSAMPARRAY input_data, |
258 JSAMPARRAY * output_data_ptr) | 445 JSAMPARRAY *output_data_ptr) |
259 { | 446 { |
260 } | 447 } |
261 | 448 |
262 GLOBAL(void) | 449 GLOBAL(void) |
263 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 450 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
264 jpeg_component_info * compptr, | 451 jpeg_component_info *compptr, |
265 JSAMPARRAY input_data, | 452 JSAMPARRAY input_data, |
266 JSAMPARRAY * output_data_ptr) | 453 JSAMPARRAY *output_data_ptr) |
267 { | 454 { |
268 } | 455 } |
269 | 456 |
270 GLOBAL(int) | 457 GLOBAL(int) |
271 jsimd_can_h2v2_merged_upsample (void) | 458 jsimd_can_h2v2_merged_upsample (void) |
272 { | 459 { |
273 init_simd(); | 460 init_simd(); |
274 | 461 |
275 return 0; | 462 return 0; |
276 } | 463 } |
(...skipping 20 matching lines...) Expand all Loading... |
297 JDIMENSION in_row_group_ctr, | 484 JDIMENSION in_row_group_ctr, |
298 JSAMPARRAY output_buf) | 485 JSAMPARRAY output_buf) |
299 { | 486 { |
300 } | 487 } |
301 | 488 |
302 GLOBAL(int) | 489 GLOBAL(int) |
303 jsimd_can_convsamp (void) | 490 jsimd_can_convsamp (void) |
304 { | 491 { |
305 init_simd(); | 492 init_simd(); |
306 | 493 |
| 494 /* The code is optimised for these values only */ |
| 495 if (DCTSIZE != 8) |
| 496 return 0; |
| 497 if (BITS_IN_JSAMPLE != 8) |
| 498 return 0; |
| 499 if (sizeof(JDIMENSION) != 4) |
| 500 return 0; |
| 501 if (sizeof(DCTELEM) != 2) |
| 502 return 0; |
| 503 |
| 504 if (simd_support & JSIMD_ARM_NEON) |
| 505 return 1; |
| 506 |
307 return 0; | 507 return 0; |
308 } | 508 } |
309 | 509 |
310 GLOBAL(int) | 510 GLOBAL(int) |
311 jsimd_can_convsamp_float (void) | 511 jsimd_can_convsamp_float (void) |
312 { | 512 { |
313 init_simd(); | 513 init_simd(); |
314 | 514 |
315 return 0; | 515 return 0; |
316 } | 516 } |
317 | 517 |
318 GLOBAL(void) | 518 GLOBAL(void) |
319 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, | 519 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
320 DCTELEM * workspace) | 520 DCTELEM *workspace) |
321 { | 521 { |
| 522 jsimd_convsamp_neon(sample_data, start_col, workspace); |
322 } | 523 } |
323 | 524 |
324 GLOBAL(void) | 525 GLOBAL(void) |
325 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, | 526 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
326 FAST_FLOAT * workspace) | 527 FAST_FLOAT *workspace) |
327 { | 528 { |
328 } | 529 } |
329 | 530 |
330 GLOBAL(int) | 531 GLOBAL(int) |
331 jsimd_can_fdct_islow (void) | 532 jsimd_can_fdct_islow (void) |
332 { | 533 { |
333 init_simd(); | 534 init_simd(); |
334 | 535 |
| 536 /* The code is optimised for these values only */ |
| 537 if (DCTSIZE != 8) |
| 538 return 0; |
| 539 if (sizeof(DCTELEM) != 2) |
| 540 return 0; |
| 541 |
| 542 if (simd_support & JSIMD_ARM_NEON) |
| 543 return 1; |
| 544 |
335 return 0; | 545 return 0; |
336 } | 546 } |
337 | 547 |
338 GLOBAL(int) | 548 GLOBAL(int) |
339 jsimd_can_fdct_ifast (void) | 549 jsimd_can_fdct_ifast (void) |
340 { | 550 { |
341 init_simd(); | 551 init_simd(); |
342 | 552 |
| 553 /* The code is optimised for these values only */ |
| 554 if (DCTSIZE != 8) |
| 555 return 0; |
| 556 if (sizeof(DCTELEM) != 2) |
| 557 return 0; |
| 558 |
| 559 if (simd_support & JSIMD_ARM_NEON) |
| 560 return 1; |
| 561 |
343 return 0; | 562 return 0; |
344 } | 563 } |
345 | 564 |
346 GLOBAL(int) | 565 GLOBAL(int) |
347 jsimd_can_fdct_float (void) | 566 jsimd_can_fdct_float (void) |
348 { | 567 { |
349 init_simd(); | 568 init_simd(); |
350 | 569 |
351 return 0; | 570 return 0; |
352 } | 571 } |
353 | 572 |
354 GLOBAL(void) | 573 GLOBAL(void) |
355 jsimd_fdct_islow (DCTELEM * data) | 574 jsimd_fdct_islow (DCTELEM *data) |
| 575 { |
| 576 jsimd_fdct_islow_neon(data); |
| 577 } |
| 578 |
| 579 GLOBAL(void) |
| 580 jsimd_fdct_ifast (DCTELEM *data) |
| 581 { |
| 582 jsimd_fdct_ifast_neon(data); |
| 583 } |
| 584 |
| 585 GLOBAL(void) |
| 586 jsimd_fdct_float (FAST_FLOAT *data) |
356 { | 587 { |
357 } | 588 } |
358 | 589 |
359 GLOBAL(void) | |
360 jsimd_fdct_ifast (DCTELEM * data) | |
361 { | |
362 } | |
363 | |
364 GLOBAL(void) | |
365 jsimd_fdct_float (FAST_FLOAT * data) | |
366 { | |
367 } | |
368 | |
369 GLOBAL(int) | 590 GLOBAL(int) |
370 jsimd_can_quantize (void) | 591 jsimd_can_quantize (void) |
371 { | 592 { |
372 init_simd(); | 593 init_simd(); |
373 | 594 |
| 595 /* The code is optimised for these values only */ |
| 596 if (DCTSIZE != 8) |
| 597 return 0; |
| 598 if (sizeof(JCOEF) != 2) |
| 599 return 0; |
| 600 if (sizeof(DCTELEM) != 2) |
| 601 return 0; |
| 602 |
| 603 if (simd_support & JSIMD_ARM_NEON) |
| 604 return 1; |
| 605 |
374 return 0; | 606 return 0; |
375 } | 607 } |
376 | 608 |
377 GLOBAL(int) | 609 GLOBAL(int) |
378 jsimd_can_quantize_float (void) | 610 jsimd_can_quantize_float (void) |
379 { | 611 { |
380 init_simd(); | 612 init_simd(); |
381 | 613 |
382 return 0; | 614 return 0; |
383 } | 615 } |
384 | 616 |
385 GLOBAL(void) | 617 GLOBAL(void) |
386 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, | 618 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, |
387 DCTELEM * workspace) | 619 DCTELEM *workspace) |
| 620 { |
| 621 jsimd_quantize_neon(coef_block, divisors, workspace); |
| 622 } |
| 623 |
| 624 GLOBAL(void) |
| 625 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
| 626 FAST_FLOAT *workspace) |
388 { | 627 { |
389 } | 628 } |
390 | 629 |
391 GLOBAL(void) | |
392 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | |
393 FAST_FLOAT * workspace) | |
394 { | |
395 } | |
396 | |
397 GLOBAL(int) | 630 GLOBAL(int) |
398 jsimd_can_idct_2x2 (void) | 631 jsimd_can_idct_2x2 (void) |
399 { | 632 { |
400 init_simd(); | 633 init_simd(); |
401 | 634 |
402 /* The code is optimised for these values only */ | 635 /* The code is optimised for these values only */ |
403 if (DCTSIZE != 8) | 636 if (DCTSIZE != 8) |
404 return 0; | 637 return 0; |
405 if (sizeof(JCOEF) != 2) | 638 if (sizeof(JCOEF) != 2) |
406 return 0; | 639 return 0; |
(...skipping 27 matching lines...) Expand all Loading... |
434 if (sizeof(ISLOW_MULT_TYPE) != 2) | 667 if (sizeof(ISLOW_MULT_TYPE) != 2) |
435 return 0; | 668 return 0; |
436 | 669 |
437 if (simd_support & JSIMD_ARM_NEON) | 670 if (simd_support & JSIMD_ARM_NEON) |
438 return 1; | 671 return 1; |
439 | 672 |
440 return 0; | 673 return 0; |
441 } | 674 } |
442 | 675 |
443 GLOBAL(void) | 676 GLOBAL(void) |
444 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 677 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
445 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 678 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
446 JDIMENSION output_col) | 679 JDIMENSION output_col) |
447 { | 680 { |
448 if (simd_support & JSIMD_ARM_NEON) | 681 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, |
449 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, | 682 output_col); |
450 output_col); | |
451 } | 683 } |
452 | 684 |
453 GLOBAL(void) | 685 GLOBAL(void) |
454 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 686 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
455 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 687 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
456 JDIMENSION output_col) | 688 JDIMENSION output_col) |
457 { | 689 { |
458 if (simd_support & JSIMD_ARM_NEON) | 690 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, |
459 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, | 691 output_col); |
460 output_col); | |
461 } | 692 } |
462 | 693 |
463 GLOBAL(int) | 694 GLOBAL(int) |
464 jsimd_can_idct_islow (void) | 695 jsimd_can_idct_islow (void) |
465 { | 696 { |
466 init_simd(); | 697 init_simd(); |
467 | 698 |
468 /* The code is optimised for these values only */ | 699 /* The code is optimised for these values only */ |
469 if (DCTSIZE != 8) | 700 if (DCTSIZE != 8) |
470 return 0; | 701 return 0; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
510 | 741 |
511 GLOBAL(int) | 742 GLOBAL(int) |
512 jsimd_can_idct_float (void) | 743 jsimd_can_idct_float (void) |
513 { | 744 { |
514 init_simd(); | 745 init_simd(); |
515 | 746 |
516 return 0; | 747 return 0; |
517 } | 748 } |
518 | 749 |
519 GLOBAL(void) | 750 GLOBAL(void) |
520 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 751 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
521 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 752 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
522 JDIMENSION output_col) | 753 JDIMENSION output_col) |
523 { | 754 { |
524 if (simd_support & JSIMD_ARM_NEON) | 755 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, |
525 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, | 756 output_col); |
526 output_col); | |
527 } | 757 } |
528 | 758 |
529 GLOBAL(void) | 759 GLOBAL(void) |
530 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 760 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
531 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 761 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
532 JDIMENSION output_col) | 762 JDIMENSION output_col) |
533 { | 763 { |
534 if (simd_support & JSIMD_ARM_NEON) | 764 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, |
535 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, | 765 output_col); |
536 output_col); | |
537 } | 766 } |
538 | 767 |
539 GLOBAL(void) | 768 GLOBAL(void) |
540 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 769 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, |
541 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 770 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
542 JDIMENSION output_col) | 771 JDIMENSION output_col) |
543 { | 772 { |
544 } | 773 } |
| 774 |
| 775 GLOBAL(int) |
| 776 jsimd_can_huff_encode_one_block (void) |
| 777 { |
| 778 init_simd(); |
| 779 |
| 780 if (DCTSIZE != 8) |
| 781 return 0; |
| 782 if (sizeof(JCOEF) != 2) |
| 783 return 0; |
| 784 |
| 785 if (simd_support & JSIMD_ARM_NEON && simd_huffman) |
| 786 return 1; |
| 787 |
| 788 return 0; |
| 789 } |
| 790 |
| 791 GLOBAL(JOCTET*) |
| 792 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, |
| 793 int last_dc_val, c_derived_tbl *dctbl, |
| 794 c_derived_tbl *actbl) |
| 795 { |
| 796 if (simd_features & JSIMD_FASTTBL) |
| 797 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, |
| 798 dctbl, actbl); |
| 799 else |
| 800 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, |
| 801 last_dc_val, dctbl, actbl); |
| 802 } |
OLD | NEW |