Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: simd/jsimd_arm64.c

Issue 1953443002: Update to libjpeg_turbo 1.4.90 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_arm64_neon.S » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * jsimd_arm64.c 2 * jsimd_arm64.c
3 * 3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2013-2014 D. R. Commander 5 * Copyright 2009-2011, 2013-2014, 2016 D. R. Commander
6 * Copyright 2015-2016 Matthieu Darbois
6 * 7 *
7 * Based on the x86 SIMD extension for IJG JPEG library, 8 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. 9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 * 11 *
11 * This file contains the interface between the "normal" portions 12 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a 13 * of the library and the SIMD implementations when running on a
13 * 64-bit ARM architecture. 14 * 64-bit ARM architecture.
14 */ 15 */
15 16
16 #define JPEG_INTERNALS 17 #define JPEG_INTERNALS
17 #include "../jinclude.h" 18 #include "../jinclude.h"
18 #include "../jpeglib.h" 19 #include "../jpeglib.h"
19 #include "../jsimd.h" 20 #include "../jsimd.h"
20 #include "../jdct.h" 21 #include "../jdct.h"
21 #include "../jsimddct.h" 22 #include "../jsimddct.h"
22 #include "jsimd.h" 23 #include "jsimd.h"
23 24
24 #include <stdio.h> 25 #include <stdio.h>
25 #include <string.h> 26 #include <string.h>
26 #include <ctype.h> 27 #include <ctype.h>
27 28
29 #define JSIMD_FASTLD3 1
30 #define JSIMD_FASTST3 2
31 #define JSIMD_FASTTBL 4
32
28 static unsigned int simd_support = ~0; 33 static unsigned int simd_support = ~0;
34 static unsigned int simd_huffman = 1;
35 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
36 JSIMD_FASTTBL;
37
38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
39
40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
41
42 LOCAL(int)
43 check_cpuinfo (char *buffer, const char *field, char *value)
44 {
45 char *p;
46 if (*value == 0)
47 return 0;
48 if (strncmp(buffer, field, strlen(field)) != 0)
49 return 0;
50 buffer += strlen(field);
51 while (isspace(*buffer))
52 buffer++;
53
54 /* Check if 'value' is present in the buffer as a separate word */
55 while ((p = strstr(buffer, value))) {
56 if (p > buffer && !isspace(*(p - 1))) {
57 buffer++;
58 continue;
59 }
60 p += strlen(value);
61 if (*p != 0 && !isspace(*p)) {
62 buffer++;
63 continue;
64 }
65 return 1;
66 }
67 return 0;
68 }
69
70 LOCAL(int)
71 parse_proc_cpuinfo (int bufsize)
72 {
73 char *buffer = (char *)malloc(bufsize);
74 FILE *fd;
75
76 if (!buffer)
77 return 0;
78
79 fd = fopen("/proc/cpuinfo", "r");
80 if (fd) {
81 while (fgets(buffer, bufsize, fd)) {
82 if (!strchr(buffer, '\n') && !feof(fd)) {
83 /* "impossible" happened - insufficient size of the buffer! */
84 fclose(fd);
85 free(buffer);
86 return 0;
87 }
88 if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
89 check_cpuinfo(buffer, "CPU part", "0xd07"))
90 /* The Cortex-A53 has a slow tbl implementation. We can gain a few
91 percent speedup by disabling the use of that instruction. The
92 speedup on Cortex-A57 is more subtle but still measurable. */
93 simd_features &= ~JSIMD_FASTTBL;
94 else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
95 /* The SIMD version of Huffman encoding is slower than the C version on
96 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that
97 CPU. */
98 simd_huffman = simd_features = 0;
99 }
100 fclose(fd);
101 }
102 free(buffer);
103 return 1;
104 }
105
106 #endif
29 107
30 /* 108 /*
31 * Check what SIMD accelerations are supported. 109 * Check what SIMD accelerations are supported.
32 * 110 *
33 * FIXME: This code is racy under a multi-threaded environment. 111 * FIXME: This code is racy under a multi-threaded environment.
34 */ 112 */
35 113
36 /* 114 /*
37 * ARMv8 architectures support NEON extensions by default. 115 * ARMv8 architectures support NEON extensions by default.
38 * It is no longer optional as it was with ARMv7. 116 * It is no longer optional as it was with ARMv7.
39 */ 117 */
40 118
41 119
42 LOCAL(void) 120 LOCAL(void)
43 init_simd (void) 121 init_simd (void)
44 { 122 {
45 char *env = NULL; 123 char *env = NULL;
124 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
125 int bufsize = 1024; /* an initial guess for the line buffer size limit */
126 #endif
46 127
47 if (simd_support != ~0U) 128 if (simd_support != ~0U)
48 return; 129 return;
49 130
50 simd_support = 0; 131 simd_support = 0;
51 132
52 simd_support |= JSIMD_ARM_NEON; 133 simd_support |= JSIMD_ARM_NEON;
134 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
135 while (!parse_proc_cpuinfo(bufsize)) {
136 bufsize *= 2;
137 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
138 break;
139 }
140 #endif
53 141
54 /* Force different settings through environment variables */ 142 /* Force different settings through environment variables */
55 env = getenv("JSIMD_FORCENEON"); 143 env = getenv("JSIMD_FORCENEON");
56 if ((env != NULL) && (strcmp(env, "1") == 0)) 144 if ((env != NULL) && (strcmp(env, "1") == 0))
57 simd_support &= JSIMD_ARM_NEON; 145 simd_support &= JSIMD_ARM_NEON;
58 env = getenv("JSIMD_FORCENONE"); 146 env = getenv("JSIMD_FORCENONE");
59 if ((env != NULL) && (strcmp(env, "1") == 0)) 147 if ((env != NULL) && (strcmp(env, "1") == 0))
60 simd_support = 0; 148 simd_support = 0;
149 env = getenv("JSIMD_NOHUFFENC");
150 if ((env != NULL) && (strcmp(env, "1") == 0))
151 simd_huffman = 0;
152 env = getenv("JSIMD_FASTLD3");
153 if ((env != NULL) && (strcmp(env, "1") == 0))
154 simd_features |= JSIMD_FASTLD3;
155 if ((env != NULL) && (strcmp(env, "0") == 0))
156 simd_features &= ~JSIMD_FASTLD3;
157 env = getenv("JSIMD_FASTST3");
158 if ((env != NULL) && (strcmp(env, "1") == 0))
159 simd_features |= JSIMD_FASTST3;
160 if ((env != NULL) && (strcmp(env, "0") == 0))
161 simd_features &= ~JSIMD_FASTST3;
61 } 162 }
62 163
63 GLOBAL(int) 164 GLOBAL(int)
64 jsimd_can_rgb_ycc (void) 165 jsimd_can_rgb_ycc (void)
65 { 166 {
66 init_simd(); 167 init_simd();
67 168
169 /* The code is optimised for these values only */
170 if (BITS_IN_JSAMPLE != 8)
171 return 0;
172 if (sizeof(JDIMENSION) != 4)
173 return 0;
174 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
175 return 0;
176
177 if (simd_support & JSIMD_ARM_NEON)
178 return 1;
179
68 return 0; 180 return 0;
69 } 181 }
70 182
71 GLOBAL(int) 183 GLOBAL(int)
72 jsimd_can_rgb_gray (void) 184 jsimd_can_rgb_gray (void)
73 { 185 {
74 init_simd(); 186 init_simd();
75 187
76 return 0; 188 return 0;
77 } 189 }
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 return 1; 222 return 1;
111 223
112 return 0; 224 return 0;
113 } 225 }
114 226
115 GLOBAL(void) 227 GLOBAL(void)
116 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 228 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
117 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 229 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
118 JDIMENSION output_row, int num_rows) 230 JDIMENSION output_row, int num_rows)
119 { 231 {
232 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
233
234 switch(cinfo->in_color_space) {
235 case JCS_EXT_RGB:
236 if (simd_features & JSIMD_FASTLD3)
237 neonfct=jsimd_extrgb_ycc_convert_neon;
238 else
239 neonfct=jsimd_extrgb_ycc_convert_neon_slowld3;
240 break;
241 case JCS_EXT_RGBX:
242 case JCS_EXT_RGBA:
243 neonfct=jsimd_extrgbx_ycc_convert_neon;
244 break;
245 case JCS_EXT_BGR:
246 if (simd_features & JSIMD_FASTLD3)
247 neonfct=jsimd_extbgr_ycc_convert_neon;
248 else
249 neonfct=jsimd_extbgr_ycc_convert_neon_slowld3;
250 break;
251 case JCS_EXT_BGRX:
252 case JCS_EXT_BGRA:
253 neonfct=jsimd_extbgrx_ycc_convert_neon;
254 break;
255 case JCS_EXT_XBGR:
256 case JCS_EXT_ABGR:
257 neonfct=jsimd_extxbgr_ycc_convert_neon;
258 break;
259 case JCS_EXT_XRGB:
260 case JCS_EXT_ARGB:
261 neonfct=jsimd_extxrgb_ycc_convert_neon;
262 break;
263 default:
264 if (simd_features & JSIMD_FASTLD3)
265 neonfct=jsimd_extrgb_ycc_convert_neon;
266 else
267 neonfct=jsimd_extrgb_ycc_convert_neon_slowld3;
268 break;
269 }
270
271 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
120 } 272 }
121 273
122 GLOBAL(void) 274 GLOBAL(void)
123 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 275 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
124 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 276 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
125 JDIMENSION output_row, int num_rows) 277 JDIMENSION output_row, int num_rows)
126 { 278 {
127 } 279 }
128 280
129 GLOBAL(void) 281 GLOBAL(void)
130 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 282 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
131 JSAMPIMAGE input_buf, JDIMENSION input_row, 283 JSAMPIMAGE input_buf, JDIMENSION input_row,
132 JSAMPARRAY output_buf, int num_rows) 284 JSAMPARRAY output_buf, int num_rows)
133 { 285 {
134 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 286 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
135 287
136 switch(cinfo->out_color_space) { 288 switch(cinfo->out_color_space) {
137 case JCS_EXT_RGB: 289 case JCS_EXT_RGB:
138 neonfct=jsimd_ycc_extrgb_convert_neon; 290 if (simd_features & JSIMD_FASTST3)
291 neonfct=jsimd_ycc_extrgb_convert_neon;
292 else
293 neonfct=jsimd_ycc_extrgb_convert_neon_slowst3;
139 break; 294 break;
140 case JCS_EXT_RGBX: 295 case JCS_EXT_RGBX:
141 case JCS_EXT_RGBA: 296 case JCS_EXT_RGBA:
142 neonfct=jsimd_ycc_extrgbx_convert_neon; 297 neonfct=jsimd_ycc_extrgbx_convert_neon;
143 break; 298 break;
144 case JCS_EXT_BGR: 299 case JCS_EXT_BGR:
145 neonfct=jsimd_ycc_extbgr_convert_neon; 300 if (simd_features & JSIMD_FASTST3)
301 neonfct=jsimd_ycc_extbgr_convert_neon;
302 else
303 neonfct=jsimd_ycc_extbgr_convert_neon_slowst3;
146 break; 304 break;
147 case JCS_EXT_BGRX: 305 case JCS_EXT_BGRX:
148 case JCS_EXT_BGRA: 306 case JCS_EXT_BGRA:
149 neonfct=jsimd_ycc_extbgrx_convert_neon; 307 neonfct=jsimd_ycc_extbgrx_convert_neon;
150 break; 308 break;
151 case JCS_EXT_XBGR: 309 case JCS_EXT_XBGR:
152 case JCS_EXT_ABGR: 310 case JCS_EXT_ABGR:
153 neonfct=jsimd_ycc_extxbgr_convert_neon; 311 neonfct=jsimd_ycc_extxbgr_convert_neon;
154 break; 312 break;
155 case JCS_EXT_XRGB: 313 case JCS_EXT_XRGB:
156 case JCS_EXT_ARGB: 314 case JCS_EXT_ARGB:
157 neonfct=jsimd_ycc_extxrgb_convert_neon; 315 neonfct=jsimd_ycc_extxrgb_convert_neon;
158 break; 316 break;
159 default: 317 default:
160 neonfct=jsimd_ycc_extrgb_convert_neon; 318 if (simd_features & JSIMD_FASTST3)
319 neonfct=jsimd_ycc_extrgb_convert_neon;
320 else
321 neonfct=jsimd_ycc_extrgb_convert_neon_slowst3;
161 break; 322 break;
162 } 323 }
163 324
164 if (simd_support & JSIMD_ARM_NEON) 325 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
165 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
166 } 326 }
167 327
168 GLOBAL(void) 328 GLOBAL(void)
169 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 329 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
170 JSAMPIMAGE input_buf, JDIMENSION input_row, 330 JSAMPIMAGE input_buf, JDIMENSION input_row,
171 JSAMPARRAY output_buf, int num_rows) 331 JSAMPARRAY output_buf, int num_rows)
172 { 332 {
173 if (simd_support & JSIMD_ARM_NEON) 333 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
174 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, 334 output_buf, num_rows);
175 output_buf, num_rows);
176 } 335 }
177 336
178 GLOBAL(int) 337 GLOBAL(int)
179 jsimd_can_h2v2_downsample (void) 338 jsimd_can_h2v2_downsample (void)
180 { 339 {
181 init_simd(); 340 init_simd();
182 341
342 /* The code is optimised for these values only */
343 if (BITS_IN_JSAMPLE != 8)
344 return 0;
345 if (DCTSIZE != 8)
346 return 0;
347 if (sizeof(JDIMENSION) != 4)
348 return 0;
349
350 if (simd_support & JSIMD_ARM_NEON)
351 return 1;
352
183 return 0; 353 return 0;
184 } 354 }
185 355
186 GLOBAL(int) 356 GLOBAL(int)
187 jsimd_can_h2v1_downsample (void) 357 jsimd_can_h2v1_downsample (void)
188 { 358 {
189 init_simd(); 359 init_simd();
190 360
361 /* The code is optimised for these values only */
362 if (BITS_IN_JSAMPLE != 8)
363 return 0;
364 if (DCTSIZE != 8)
365 return 0;
366 if (sizeof(JDIMENSION) != 4)
367 return 0;
368
369 if (simd_support & JSIMD_ARM_NEON)
370 return 1;
371
191 return 0; 372 return 0;
192 } 373 }
193 374
194 GLOBAL(void) 375 GLOBAL(void)
195 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 376 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
196 JSAMPARRAY input_data, JSAMPARRAY output_data) 377 JSAMPARRAY input_data, JSAMPARRAY output_data)
197 { 378 {
379 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
380 compptr->v_samp_factor, compptr->width_in_blocks,
381 input_data, output_data);
198 } 382 }
199 383
200 GLOBAL(void) 384 GLOBAL(void)
201 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 385 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
202 JSAMPARRAY input_data, JSAMPARRAY output_data) 386 JSAMPARRAY input_data, JSAMPARRAY output_data)
203 { 387 {
388 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
389 compptr->v_samp_factor, compptr->width_in_blocks,
390 input_data, output_data);
204 } 391 }
205 392
206 GLOBAL(int) 393 GLOBAL(int)
207 jsimd_can_h2v2_upsample (void) 394 jsimd_can_h2v2_upsample (void)
208 { 395 {
209 init_simd(); 396 init_simd();
210 397
211 return 0; 398 return 0;
212 } 399 }
213 400
214 GLOBAL(int) 401 GLOBAL(int)
215 jsimd_can_h2v1_upsample (void) 402 jsimd_can_h2v1_upsample (void)
216 { 403 {
217 init_simd(); 404 init_simd();
218 405
219 return 0; 406 return 0;
220 } 407 }
221 408
222 GLOBAL(void) 409 GLOBAL(void)
223 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 410 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
224 jpeg_component_info * compptr, 411 jpeg_component_info *compptr,
225 JSAMPARRAY input_data, 412 JSAMPARRAY input_data,
226 JSAMPARRAY * output_data_ptr) 413 JSAMPARRAY *output_data_ptr)
227 { 414 {
228 } 415 }
229 416
230 GLOBAL(void) 417 GLOBAL(void)
231 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 418 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
232 jpeg_component_info * compptr, 419 jpeg_component_info *compptr,
233 JSAMPARRAY input_data, 420 JSAMPARRAY input_data,
234 JSAMPARRAY * output_data_ptr) 421 JSAMPARRAY *output_data_ptr)
235 { 422 {
236 } 423 }
237 424
238 GLOBAL(int) 425 GLOBAL(int)
239 jsimd_can_h2v2_fancy_upsample (void) 426 jsimd_can_h2v2_fancy_upsample (void)
240 { 427 {
241 init_simd(); 428 init_simd();
242 429
243 return 0; 430 return 0;
244 } 431 }
245 432
246 GLOBAL(int) 433 GLOBAL(int)
247 jsimd_can_h2v1_fancy_upsample (void) 434 jsimd_can_h2v1_fancy_upsample (void)
248 { 435 {
249 init_simd(); 436 init_simd();
250 437
251 return 0; 438 return 0;
252 } 439 }
253 440
254 GLOBAL(void) 441 GLOBAL(void)
255 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 442 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
256 jpeg_component_info * compptr, 443 jpeg_component_info *compptr,
257 JSAMPARRAY input_data, 444 JSAMPARRAY input_data,
258 JSAMPARRAY * output_data_ptr) 445 JSAMPARRAY *output_data_ptr)
259 { 446 {
260 } 447 }
261 448
262 GLOBAL(void) 449 GLOBAL(void)
263 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 450 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
264 jpeg_component_info * compptr, 451 jpeg_component_info *compptr,
265 JSAMPARRAY input_data, 452 JSAMPARRAY input_data,
266 JSAMPARRAY * output_data_ptr) 453 JSAMPARRAY *output_data_ptr)
267 { 454 {
268 } 455 }
269 456
270 GLOBAL(int) 457 GLOBAL(int)
271 jsimd_can_h2v2_merged_upsample (void) 458 jsimd_can_h2v2_merged_upsample (void)
272 { 459 {
273 init_simd(); 460 init_simd();
274 461
275 return 0; 462 return 0;
276 } 463 }
(...skipping 20 matching lines...) Expand all
297 JDIMENSION in_row_group_ctr, 484 JDIMENSION in_row_group_ctr,
298 JSAMPARRAY output_buf) 485 JSAMPARRAY output_buf)
299 { 486 {
300 } 487 }
301 488
302 GLOBAL(int) 489 GLOBAL(int)
303 jsimd_can_convsamp (void) 490 jsimd_can_convsamp (void)
304 { 491 {
305 init_simd(); 492 init_simd();
306 493
494 /* The code is optimised for these values only */
495 if (DCTSIZE != 8)
496 return 0;
497 if (BITS_IN_JSAMPLE != 8)
498 return 0;
499 if (sizeof(JDIMENSION) != 4)
500 return 0;
501 if (sizeof(DCTELEM) != 2)
502 return 0;
503
504 if (simd_support & JSIMD_ARM_NEON)
505 return 1;
506
307 return 0; 507 return 0;
308 } 508 }
309 509
310 GLOBAL(int) 510 GLOBAL(int)
311 jsimd_can_convsamp_float (void) 511 jsimd_can_convsamp_float (void)
312 { 512 {
313 init_simd(); 513 init_simd();
314 514
315 return 0; 515 return 0;
316 } 516 }
317 517
318 GLOBAL(void) 518 GLOBAL(void)
319 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 519 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
320 DCTELEM * workspace) 520 DCTELEM *workspace)
321 { 521 {
522 jsimd_convsamp_neon(sample_data, start_col, workspace);
322 } 523 }
323 524
324 GLOBAL(void) 525 GLOBAL(void)
325 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 526 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
326 FAST_FLOAT * workspace) 527 FAST_FLOAT *workspace)
327 { 528 {
328 } 529 }
329 530
330 GLOBAL(int) 531 GLOBAL(int)
331 jsimd_can_fdct_islow (void) 532 jsimd_can_fdct_islow (void)
332 { 533 {
333 init_simd(); 534 init_simd();
334 535
536 /* The code is optimised for these values only */
537 if (DCTSIZE != 8)
538 return 0;
539 if (sizeof(DCTELEM) != 2)
540 return 0;
541
542 if (simd_support & JSIMD_ARM_NEON)
543 return 1;
544
335 return 0; 545 return 0;
336 } 546 }
337 547
338 GLOBAL(int) 548 GLOBAL(int)
339 jsimd_can_fdct_ifast (void) 549 jsimd_can_fdct_ifast (void)
340 { 550 {
341 init_simd(); 551 init_simd();
342 552
553 /* The code is optimised for these values only */
554 if (DCTSIZE != 8)
555 return 0;
556 if (sizeof(DCTELEM) != 2)
557 return 0;
558
559 if (simd_support & JSIMD_ARM_NEON)
560 return 1;
561
343 return 0; 562 return 0;
344 } 563 }
345 564
346 GLOBAL(int) 565 GLOBAL(int)
347 jsimd_can_fdct_float (void) 566 jsimd_can_fdct_float (void)
348 { 567 {
349 init_simd(); 568 init_simd();
350 569
351 return 0; 570 return 0;
352 } 571 }
353 572
354 GLOBAL(void) 573 GLOBAL(void)
355 jsimd_fdct_islow (DCTELEM * data) 574 jsimd_fdct_islow (DCTELEM *data)
575 {
576 jsimd_fdct_islow_neon(data);
577 }
578
579 GLOBAL(void)
580 jsimd_fdct_ifast (DCTELEM *data)
581 {
582 jsimd_fdct_ifast_neon(data);
583 }
584
585 GLOBAL(void)
586 jsimd_fdct_float (FAST_FLOAT *data)
356 { 587 {
357 } 588 }
358 589
359 GLOBAL(void)
360 jsimd_fdct_ifast (DCTELEM * data)
361 {
362 }
363
364 GLOBAL(void)
365 jsimd_fdct_float (FAST_FLOAT * data)
366 {
367 }
368
369 GLOBAL(int) 590 GLOBAL(int)
370 jsimd_can_quantize (void) 591 jsimd_can_quantize (void)
371 { 592 {
372 init_simd(); 593 init_simd();
373 594
595 /* The code is optimised for these values only */
596 if (DCTSIZE != 8)
597 return 0;
598 if (sizeof(JCOEF) != 2)
599 return 0;
600 if (sizeof(DCTELEM) != 2)
601 return 0;
602
603 if (simd_support & JSIMD_ARM_NEON)
604 return 1;
605
374 return 0; 606 return 0;
375 } 607 }
376 608
377 GLOBAL(int) 609 GLOBAL(int)
378 jsimd_can_quantize_float (void) 610 jsimd_can_quantize_float (void)
379 { 611 {
380 init_simd(); 612 init_simd();
381 613
382 return 0; 614 return 0;
383 } 615 }
384 616
385 GLOBAL(void) 617 GLOBAL(void)
386 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 618 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
387 DCTELEM * workspace) 619 DCTELEM *workspace)
620 {
621 jsimd_quantize_neon(coef_block, divisors, workspace);
622 }
623
624 GLOBAL(void)
625 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
626 FAST_FLOAT *workspace)
388 { 627 {
389 } 628 }
390 629
391 GLOBAL(void)
392 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
393 FAST_FLOAT * workspace)
394 {
395 }
396
397 GLOBAL(int) 630 GLOBAL(int)
398 jsimd_can_idct_2x2 (void) 631 jsimd_can_idct_2x2 (void)
399 { 632 {
400 init_simd(); 633 init_simd();
401 634
402 /* The code is optimised for these values only */ 635 /* The code is optimised for these values only */
403 if (DCTSIZE != 8) 636 if (DCTSIZE != 8)
404 return 0; 637 return 0;
405 if (sizeof(JCOEF) != 2) 638 if (sizeof(JCOEF) != 2)
406 return 0; 639 return 0;
(...skipping 27 matching lines...) Expand all
434 if (sizeof(ISLOW_MULT_TYPE) != 2) 667 if (sizeof(ISLOW_MULT_TYPE) != 2)
435 return 0; 668 return 0;
436 669
437 if (simd_support & JSIMD_ARM_NEON) 670 if (simd_support & JSIMD_ARM_NEON)
438 return 1; 671 return 1;
439 672
440 return 0; 673 return 0;
441 } 674 }
442 675
443 GLOBAL(void) 676 GLOBAL(void)
444 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 677 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
445 JCOEFPTR coef_block, JSAMPARRAY output_buf, 678 JCOEFPTR coef_block, JSAMPARRAY output_buf,
446 JDIMENSION output_col) 679 JDIMENSION output_col)
447 { 680 {
448 if (simd_support & JSIMD_ARM_NEON) 681 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
449 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, 682 output_col);
450 output_col);
451 } 683 }
452 684
453 GLOBAL(void) 685 GLOBAL(void)
454 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 686 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
455 JCOEFPTR coef_block, JSAMPARRAY output_buf, 687 JCOEFPTR coef_block, JSAMPARRAY output_buf,
456 JDIMENSION output_col) 688 JDIMENSION output_col)
457 { 689 {
458 if (simd_support & JSIMD_ARM_NEON) 690 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
459 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, 691 output_col);
460 output_col);
461 } 692 }
462 693
463 GLOBAL(int) 694 GLOBAL(int)
464 jsimd_can_idct_islow (void) 695 jsimd_can_idct_islow (void)
465 { 696 {
466 init_simd(); 697 init_simd();
467 698
468 /* The code is optimised for these values only */ 699 /* The code is optimised for these values only */
469 if (DCTSIZE != 8) 700 if (DCTSIZE != 8)
470 return 0; 701 return 0;
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 741
511 GLOBAL(int) 742 GLOBAL(int)
512 jsimd_can_idct_float (void) 743 jsimd_can_idct_float (void)
513 { 744 {
514 init_simd(); 745 init_simd();
515 746
516 return 0; 747 return 0;
517 } 748 }
518 749
519 GLOBAL(void) 750 GLOBAL(void)
520 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 751 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
521 JCOEFPTR coef_block, JSAMPARRAY output_buf, 752 JCOEFPTR coef_block, JSAMPARRAY output_buf,
522 JDIMENSION output_col) 753 JDIMENSION output_col)
523 { 754 {
524 if (simd_support & JSIMD_ARM_NEON) 755 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
525 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, 756 output_col);
526 output_col);
527 } 757 }
528 758
529 GLOBAL(void) 759 GLOBAL(void)
530 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 760 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
531 JCOEFPTR coef_block, JSAMPARRAY output_buf, 761 JCOEFPTR coef_block, JSAMPARRAY output_buf,
532 JDIMENSION output_col) 762 JDIMENSION output_col)
533 { 763 {
534 if (simd_support & JSIMD_ARM_NEON) 764 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
535 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, 765 output_col);
536 output_col);
537 } 766 }
538 767
539 GLOBAL(void) 768 GLOBAL(void)
540 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 769 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
541 JCOEFPTR coef_block, JSAMPARRAY output_buf, 770 JCOEFPTR coef_block, JSAMPARRAY output_buf,
542 JDIMENSION output_col) 771 JDIMENSION output_col)
543 { 772 {
544 } 773 }
774
775 GLOBAL(int)
776 jsimd_can_huff_encode_one_block (void)
777 {
778 init_simd();
779
780 if (DCTSIZE != 8)
781 return 0;
782 if (sizeof(JCOEF) != 2)
783 return 0;
784
785 if (simd_support & JSIMD_ARM_NEON && simd_huffman)
786 return 1;
787
788 return 0;
789 }
790
791 GLOBAL(JOCTET*)
792 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
793 int last_dc_val, c_derived_tbl *dctbl,
794 c_derived_tbl *actbl)
795 {
796 if (simd_features & JSIMD_FASTTBL)
797 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
798 dctbl, actbl);
799 else
800 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
801 last_dc_val, dctbl, actbl);
802 }
OLDNEW
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_arm64_neon.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698