Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(325)

Side by Side Diff: Linux_x64/lib/clang/3.5.0/include/emmintrin.h

Issue 228113004: roll llvm-allocated-type 186332:204777. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/llvm-allocated-type/
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26
27 #ifndef __SSE2__
28 #error "SSE2 instruction set not enabled"
29 #else
30
31 #include <xmmintrin.h>
32
33 typedef double __m128d __attribute__((__vector_size__(16)));
34 typedef long long __m128i __attribute__((__vector_size__(16)));
35
36 /* Type defines. */
37 typedef double __v2df __attribute__ ((__vector_size__ (16)));
38 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39 typedef short __v8hi __attribute__((__vector_size__(16)));
40 typedef char __v16qi __attribute__((__vector_size__(16)));
41
42 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
43 _mm_add_sd(__m128d __a, __m128d __b)
44 {
45 __a[0] += __b[0];
46 return __a;
47 }
48
49 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
50 _mm_add_pd(__m128d __a, __m128d __b)
51 {
52 return __a + __b;
53 }
54
55 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
56 _mm_sub_sd(__m128d __a, __m128d __b)
57 {
58 __a[0] -= __b[0];
59 return __a;
60 }
61
62 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
63 _mm_sub_pd(__m128d __a, __m128d __b)
64 {
65 return __a - __b;
66 }
67
68 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
69 _mm_mul_sd(__m128d __a, __m128d __b)
70 {
71 __a[0] *= __b[0];
72 return __a;
73 }
74
75 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
76 _mm_mul_pd(__m128d __a, __m128d __b)
77 {
78 return __a * __b;
79 }
80
81 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
82 _mm_div_sd(__m128d __a, __m128d __b)
83 {
84 __a[0] /= __b[0];
85 return __a;
86 }
87
88 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
89 _mm_div_pd(__m128d __a, __m128d __b)
90 {
91 return __a / __b;
92 }
93
94 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
95 _mm_sqrt_sd(__m128d __a, __m128d __b)
96 {
97 __m128d __c = __builtin_ia32_sqrtsd(__b);
98 return (__m128d) { __c[0], __a[1] };
99 }
100
101 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102 _mm_sqrt_pd(__m128d __a)
103 {
104 return __builtin_ia32_sqrtpd(__a);
105 }
106
107 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
108 _mm_min_sd(__m128d __a, __m128d __b)
109 {
110 return __builtin_ia32_minsd(__a, __b);
111 }
112
113 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114 _mm_min_pd(__m128d __a, __m128d __b)
115 {
116 return __builtin_ia32_minpd(__a, __b);
117 }
118
119 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
120 _mm_max_sd(__m128d __a, __m128d __b)
121 {
122 return __builtin_ia32_maxsd(__a, __b);
123 }
124
125 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126 _mm_max_pd(__m128d __a, __m128d __b)
127 {
128 return __builtin_ia32_maxpd(__a, __b);
129 }
130
131 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
132 _mm_and_pd(__m128d __a, __m128d __b)
133 {
134 return (__m128d)((__v4si)__a & (__v4si)__b);
135 }
136
137 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138 _mm_andnot_pd(__m128d __a, __m128d __b)
139 {
140 return (__m128d)(~(__v4si)__a & (__v4si)__b);
141 }
142
143 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
144 _mm_or_pd(__m128d __a, __m128d __b)
145 {
146 return (__m128d)((__v4si)__a | (__v4si)__b);
147 }
148
149 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150 _mm_xor_pd(__m128d __a, __m128d __b)
151 {
152 return (__m128d)((__v4si)__a ^ (__v4si)__b);
153 }
154
155 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
156 _mm_cmpeq_pd(__m128d __a, __m128d __b)
157 {
158 return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
159 }
160
161 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
162 _mm_cmplt_pd(__m128d __a, __m128d __b)
163 {
164 return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
165 }
166
167 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
168 _mm_cmple_pd(__m128d __a, __m128d __b)
169 {
170 return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
171 }
172
173 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
174 _mm_cmpgt_pd(__m128d __a, __m128d __b)
175 {
176 return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
177 }
178
179 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
180 _mm_cmpge_pd(__m128d __a, __m128d __b)
181 {
182 return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
183 }
184
185 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
186 _mm_cmpord_pd(__m128d __a, __m128d __b)
187 {
188 return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
189 }
190
191 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
192 _mm_cmpunord_pd(__m128d __a, __m128d __b)
193 {
194 return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
195 }
196
197 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
198 _mm_cmpneq_pd(__m128d __a, __m128d __b)
199 {
200 return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
201 }
202
203 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
204 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
205 {
206 return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
207 }
208
209 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
210 _mm_cmpnle_pd(__m128d __a, __m128d __b)
211 {
212 return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
213 }
214
215 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
216 _mm_cmpngt_pd(__m128d __a, __m128d __b)
217 {
218 return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
219 }
220
221 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
222 _mm_cmpnge_pd(__m128d __a, __m128d __b)
223 {
224 return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
225 }
226
227 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
228 _mm_cmpeq_sd(__m128d __a, __m128d __b)
229 {
230 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
231 }
232
233 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
234 _mm_cmplt_sd(__m128d __a, __m128d __b)
235 {
236 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
237 }
238
239 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
240 _mm_cmple_sd(__m128d __a, __m128d __b)
241 {
242 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
243 }
244
245 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
246 _mm_cmpgt_sd(__m128d __a, __m128d __b)
247 {
248 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
249 return (__m128d) { __c[0], __a[1] };
250 }
251
252 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
253 _mm_cmpge_sd(__m128d __a, __m128d __b)
254 {
255 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
256 return (__m128d) { __c[0], __a[1] };
257 }
258
259 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
260 _mm_cmpord_sd(__m128d __a, __m128d __b)
261 {
262 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
263 }
264
265 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
266 _mm_cmpunord_sd(__m128d __a, __m128d __b)
267 {
268 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
269 }
270
271 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
272 _mm_cmpneq_sd(__m128d __a, __m128d __b)
273 {
274 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
275 }
276
277 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
278 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
279 {
280 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
281 }
282
283 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
284 _mm_cmpnle_sd(__m128d __a, __m128d __b)
285 {
286 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
287 }
288
289 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
290 _mm_cmpngt_sd(__m128d __a, __m128d __b)
291 {
292 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
293 return (__m128d) { __c[0], __a[1] };
294 }
295
296 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
297 _mm_cmpnge_sd(__m128d __a, __m128d __b)
298 {
299 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
300 return (__m128d) { __c[0], __a[1] };
301 }
302
303 static __inline__ int __attribute__((__always_inline__, __nodebug__))
304 _mm_comieq_sd(__m128d __a, __m128d __b)
305 {
306 return __builtin_ia32_comisdeq(__a, __b);
307 }
308
309 static __inline__ int __attribute__((__always_inline__, __nodebug__))
310 _mm_comilt_sd(__m128d __a, __m128d __b)
311 {
312 return __builtin_ia32_comisdlt(__a, __b);
313 }
314
315 static __inline__ int __attribute__((__always_inline__, __nodebug__))
316 _mm_comile_sd(__m128d __a, __m128d __b)
317 {
318 return __builtin_ia32_comisdle(__a, __b);
319 }
320
321 static __inline__ int __attribute__((__always_inline__, __nodebug__))
322 _mm_comigt_sd(__m128d __a, __m128d __b)
323 {
324 return __builtin_ia32_comisdgt(__a, __b);
325 }
326
327 static __inline__ int __attribute__((__always_inline__, __nodebug__))
328 _mm_comige_sd(__m128d __a, __m128d __b)
329 {
330 return __builtin_ia32_comisdge(__a, __b);
331 }
332
333 static __inline__ int __attribute__((__always_inline__, __nodebug__))
334 _mm_comineq_sd(__m128d __a, __m128d __b)
335 {
336 return __builtin_ia32_comisdneq(__a, __b);
337 }
338
339 static __inline__ int __attribute__((__always_inline__, __nodebug__))
340 _mm_ucomieq_sd(__m128d __a, __m128d __b)
341 {
342 return __builtin_ia32_ucomisdeq(__a, __b);
343 }
344
345 static __inline__ int __attribute__((__always_inline__, __nodebug__))
346 _mm_ucomilt_sd(__m128d __a, __m128d __b)
347 {
348 return __builtin_ia32_ucomisdlt(__a, __b);
349 }
350
351 static __inline__ int __attribute__((__always_inline__, __nodebug__))
352 _mm_ucomile_sd(__m128d __a, __m128d __b)
353 {
354 return __builtin_ia32_ucomisdle(__a, __b);
355 }
356
357 static __inline__ int __attribute__((__always_inline__, __nodebug__))
358 _mm_ucomigt_sd(__m128d __a, __m128d __b)
359 {
360 return __builtin_ia32_ucomisdgt(__a, __b);
361 }
362
363 static __inline__ int __attribute__((__always_inline__, __nodebug__))
364 _mm_ucomige_sd(__m128d __a, __m128d __b)
365 {
366 return __builtin_ia32_ucomisdge(__a, __b);
367 }
368
369 static __inline__ int __attribute__((__always_inline__, __nodebug__))
370 _mm_ucomineq_sd(__m128d __a, __m128d __b)
371 {
372 return __builtin_ia32_ucomisdneq(__a, __b);
373 }
374
375 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
376 _mm_cvtpd_ps(__m128d __a)
377 {
378 return __builtin_ia32_cvtpd2ps(__a);
379 }
380
381 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
382 _mm_cvtps_pd(__m128 __a)
383 {
384 return __builtin_ia32_cvtps2pd(__a);
385 }
386
387 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
388 _mm_cvtepi32_pd(__m128i __a)
389 {
390 return __builtin_ia32_cvtdq2pd((__v4si)__a);
391 }
392
393 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
394 _mm_cvtpd_epi32(__m128d __a)
395 {
396 return __builtin_ia32_cvtpd2dq(__a);
397 }
398
399 static __inline__ int __attribute__((__always_inline__, __nodebug__))
400 _mm_cvtsd_si32(__m128d __a)
401 {
402 return __builtin_ia32_cvtsd2si(__a);
403 }
404
405 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
406 _mm_cvtsd_ss(__m128 __a, __m128d __b)
407 {
408 __a[0] = __b[0];
409 return __a;
410 }
411
412 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
413 _mm_cvtsi32_sd(__m128d __a, int __b)
414 {
415 __a[0] = __b;
416 return __a;
417 }
418
419 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
420 _mm_cvtss_sd(__m128d __a, __m128 __b)
421 {
422 __a[0] = __b[0];
423 return __a;
424 }
425
426 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
427 _mm_cvttpd_epi32(__m128d __a)
428 {
429 return (__m128i)__builtin_ia32_cvttpd2dq(__a);
430 }
431
432 static __inline__ int __attribute__((__always_inline__, __nodebug__))
433 _mm_cvttsd_si32(__m128d __a)
434 {
435 return __a[0];
436 }
437
438 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
439 _mm_cvtpd_pi32(__m128d __a)
440 {
441 return (__m64)__builtin_ia32_cvtpd2pi(__a);
442 }
443
444 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
445 _mm_cvttpd_pi32(__m128d __a)
446 {
447 return (__m64)__builtin_ia32_cvttpd2pi(__a);
448 }
449
450 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
451 _mm_cvtpi32_pd(__m64 __a)
452 {
453 return __builtin_ia32_cvtpi2pd((__v2si)__a);
454 }
455
456 static __inline__ double __attribute__((__always_inline__, __nodebug__))
457 _mm_cvtsd_f64(__m128d __a)
458 {
459 return __a[0];
460 }
461
462 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
463 _mm_load_pd(double const *__dp)
464 {
465 return *(__m128d*)__dp;
466 }
467
468 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
469 _mm_load1_pd(double const *__dp)
470 {
471 struct __mm_load1_pd_struct {
472 double __u;
473 } __attribute__((__packed__, __may_alias__));
474 double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
475 return (__m128d){ __u, __u };
476 }
477
478 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
479
480 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
481 _mm_loadr_pd(double const *__dp)
482 {
483 __m128d __u = *(__m128d*)__dp;
484 return __builtin_shufflevector(__u, __u, 1, 0);
485 }
486
487 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
488 _mm_loadu_pd(double const *__dp)
489 {
490 struct __loadu_pd {
491 __m128d __v;
492 } __attribute__((packed, may_alias));
493 return ((struct __loadu_pd*)__dp)->__v;
494 }
495
496 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
497 _mm_load_sd(double const *__dp)
498 {
499 struct __mm_load_sd_struct {
500 double __u;
501 } __attribute__((__packed__, __may_alias__));
502 double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
503 return (__m128d){ __u, 0 };
504 }
505
506 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
507 _mm_loadh_pd(__m128d __a, double const *__dp)
508 {
509 struct __mm_loadh_pd_struct {
510 double __u;
511 } __attribute__((__packed__, __may_alias__));
512 double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
513 return (__m128d){ __a[0], __u };
514 }
515
516 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
517 _mm_loadl_pd(__m128d __a, double const *__dp)
518 {
519 struct __mm_loadl_pd_struct {
520 double __u;
521 } __attribute__((__packed__, __may_alias__));
522 double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
523 return (__m128d){ __u, __a[1] };
524 }
525
526 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
527 _mm_set_sd(double __w)
528 {
529 return (__m128d){ __w, 0 };
530 }
531
532 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
533 _mm_set1_pd(double __w)
534 {
535 return (__m128d){ __w, __w };
536 }
537
538 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
539 _mm_set_pd(double __w, double __x)
540 {
541 return (__m128d){ __x, __w };
542 }
543
544 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
545 _mm_setr_pd(double __w, double __x)
546 {
547 return (__m128d){ __w, __x };
548 }
549
550 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
551 _mm_setzero_pd(void)
552 {
553 return (__m128d){ 0, 0 };
554 }
555
556 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
557 _mm_move_sd(__m128d __a, __m128d __b)
558 {
559 return (__m128d){ __b[0], __a[1] };
560 }
561
562 static __inline__ void __attribute__((__always_inline__, __nodebug__))
563 _mm_store_sd(double *__dp, __m128d __a)
564 {
565 struct __mm_store_sd_struct {
566 double __u;
567 } __attribute__((__packed__, __may_alias__));
568 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
569 }
570
571 static __inline__ void __attribute__((__always_inline__, __nodebug__))
572 _mm_store1_pd(double *__dp, __m128d __a)
573 {
574 struct __mm_store1_pd_struct {
575 double __u[2];
576 } __attribute__((__packed__, __may_alias__));
577 ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
578 ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
579 }
580
581 static __inline__ void __attribute__((__always_inline__, __nodebug__))
582 _mm_store_pd(double *__dp, __m128d __a)
583 {
584 *(__m128d *)__dp = __a;
585 }
586
587 static __inline__ void __attribute__((__always_inline__, __nodebug__))
588 _mm_storeu_pd(double *__dp, __m128d __a)
589 {
590 __builtin_ia32_storeupd(__dp, __a);
591 }
592
593 static __inline__ void __attribute__((__always_inline__, __nodebug__))
594 _mm_storer_pd(double *__dp, __m128d __a)
595 {
596 __a = __builtin_shufflevector(__a, __a, 1, 0);
597 *(__m128d *)__dp = __a;
598 }
599
600 static __inline__ void __attribute__((__always_inline__, __nodebug__))
601 _mm_storeh_pd(double *__dp, __m128d __a)
602 {
603 struct __mm_storeh_pd_struct {
604 double __u;
605 } __attribute__((__packed__, __may_alias__));
606 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
607 }
608
609 static __inline__ void __attribute__((__always_inline__, __nodebug__))
610 _mm_storel_pd(double *__dp, __m128d __a)
611 {
612 struct __mm_storeh_pd_struct {
613 double __u;
614 } __attribute__((__packed__, __may_alias__));
615 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
616 }
617
618 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
619 _mm_add_epi8(__m128i __a, __m128i __b)
620 {
621 return (__m128i)((__v16qi)__a + (__v16qi)__b);
622 }
623
624 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
625 _mm_add_epi16(__m128i __a, __m128i __b)
626 {
627 return (__m128i)((__v8hi)__a + (__v8hi)__b);
628 }
629
630 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
631 _mm_add_epi32(__m128i __a, __m128i __b)
632 {
633 return (__m128i)((__v4si)__a + (__v4si)__b);
634 }
635
636 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
637 _mm_add_si64(__m64 __a, __m64 __b)
638 {
639 return __a + __b;
640 }
641
642 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
643 _mm_add_epi64(__m128i __a, __m128i __b)
644 {
645 return __a + __b;
646 }
647
648 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
649 _mm_adds_epi8(__m128i __a, __m128i __b)
650 {
651 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
652 }
653
654 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
655 _mm_adds_epi16(__m128i __a, __m128i __b)
656 {
657 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
658 }
659
660 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
661 _mm_adds_epu8(__m128i __a, __m128i __b)
662 {
663 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
664 }
665
666 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
667 _mm_adds_epu16(__m128i __a, __m128i __b)
668 {
669 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
670 }
671
672 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
673 _mm_avg_epu8(__m128i __a, __m128i __b)
674 {
675 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
676 }
677
678 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
679 _mm_avg_epu16(__m128i __a, __m128i __b)
680 {
681 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
682 }
683
684 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
685 _mm_madd_epi16(__m128i __a, __m128i __b)
686 {
687 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
688 }
689
690 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
691 _mm_max_epi16(__m128i __a, __m128i __b)
692 {
693 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
694 }
695
696 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
697 _mm_max_epu8(__m128i __a, __m128i __b)
698 {
699 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
700 }
701
702 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
703 _mm_min_epi16(__m128i __a, __m128i __b)
704 {
705 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
706 }
707
708 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
709 _mm_min_epu8(__m128i __a, __m128i __b)
710 {
711 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
712 }
713
714 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
715 _mm_mulhi_epi16(__m128i __a, __m128i __b)
716 {
717 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
718 }
719
720 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
721 _mm_mulhi_epu16(__m128i __a, __m128i __b)
722 {
723 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
724 }
725
726 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
727 _mm_mullo_epi16(__m128i __a, __m128i __b)
728 {
729 return (__m128i)((__v8hi)__a * (__v8hi)__b);
730 }
731
732 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
733 _mm_mul_su32(__m64 __a, __m64 __b)
734 {
735 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
736 }
737
738 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
739 _mm_mul_epu32(__m128i __a, __m128i __b)
740 {
741 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
742 }
743
744 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
745 _mm_sad_epu8(__m128i __a, __m128i __b)
746 {
747 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
748 }
749
750 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
751 _mm_sub_epi8(__m128i __a, __m128i __b)
752 {
753 return (__m128i)((__v16qi)__a - (__v16qi)__b);
754 }
755
756 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
757 _mm_sub_epi16(__m128i __a, __m128i __b)
758 {
759 return (__m128i)((__v8hi)__a - (__v8hi)__b);
760 }
761
762 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
763 _mm_sub_epi32(__m128i __a, __m128i __b)
764 {
765 return (__m128i)((__v4si)__a - (__v4si)__b);
766 }
767
768 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
769 _mm_sub_si64(__m64 __a, __m64 __b)
770 {
771 return __a - __b;
772 }
773
774 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
775 _mm_sub_epi64(__m128i __a, __m128i __b)
776 {
777 return __a - __b;
778 }
779
780 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
781 _mm_subs_epi8(__m128i __a, __m128i __b)
782 {
783 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
784 }
785
786 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
787 _mm_subs_epi16(__m128i __a, __m128i __b)
788 {
789 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
790 }
791
792 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
793 _mm_subs_epu8(__m128i __a, __m128i __b)
794 {
795 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
796 }
797
798 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
799 _mm_subs_epu16(__m128i __a, __m128i __b)
800 {
801 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
802 }
803
804 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
805 _mm_and_si128(__m128i __a, __m128i __b)
806 {
807 return __a & __b;
808 }
809
810 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
811 _mm_andnot_si128(__m128i __a, __m128i __b)
812 {
813 return ~__a & __b;
814 }
815
816 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
817 _mm_or_si128(__m128i __a, __m128i __b)
818 {
819 return __a | __b;
820 }
821
822 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
823 _mm_xor_si128(__m128i __a, __m128i __b)
824 {
825 return __a ^ __b;
826 }
827
828 #define _mm_slli_si128(a, count) __extension__ ({ \
829 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
830 __m128i __a = (a); \
831 _Pragma("clang diagnostic pop"); \
832 (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
833
834 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
835 _mm_slli_epi16(__m128i __a, int __count)
836 {
837 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
838 }
839
840 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
841 _mm_sll_epi16(__m128i __a, __m128i __count)
842 {
843 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
844 }
845
846 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
847 _mm_slli_epi32(__m128i __a, int __count)
848 {
849 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
850 }
851
852 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
853 _mm_sll_epi32(__m128i __a, __m128i __count)
854 {
855 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
856 }
857
858 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
859 _mm_slli_epi64(__m128i __a, int __count)
860 {
861 return __builtin_ia32_psllqi128(__a, __count);
862 }
863
864 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
865 _mm_sll_epi64(__m128i __a, __m128i __count)
866 {
867 return __builtin_ia32_psllq128(__a, __count);
868 }
869
870 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
871 _mm_srai_epi16(__m128i __a, int __count)
872 {
873 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
874 }
875
876 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
877 _mm_sra_epi16(__m128i __a, __m128i __count)
878 {
879 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
880 }
881
882 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
883 _mm_srai_epi32(__m128i __a, int __count)
884 {
885 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
886 }
887
888 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
889 _mm_sra_epi32(__m128i __a, __m128i __count)
890 {
891 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
892 }
893
894
895 #define _mm_srli_si128(a, count) __extension__ ({ \
896 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
897 __m128i __a = (a); \
898 _Pragma("clang diagnostic pop"); \
899 (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
900
901 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
902 _mm_srli_epi16(__m128i __a, int __count)
903 {
904 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
905 }
906
907 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
908 _mm_srl_epi16(__m128i __a, __m128i __count)
909 {
910 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
911 }
912
913 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
914 _mm_srli_epi32(__m128i __a, int __count)
915 {
916 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
917 }
918
919 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
920 _mm_srl_epi32(__m128i __a, __m128i __count)
921 {
922 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
923 }
924
925 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
926 _mm_srli_epi64(__m128i __a, int __count)
927 {
928 return __builtin_ia32_psrlqi128(__a, __count);
929 }
930
931 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
932 _mm_srl_epi64(__m128i __a, __m128i __count)
933 {
934 return __builtin_ia32_psrlq128(__a, __count);
935 }
936
937 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
938 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
939 {
940 return (__m128i)((__v16qi)__a == (__v16qi)__b);
941 }
942
943 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
944 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
945 {
946 return (__m128i)((__v8hi)__a == (__v8hi)__b);
947 }
948
949 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
950 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
951 {
952 return (__m128i)((__v4si)__a == (__v4si)__b);
953 }
954
955 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
956 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
957 {
958 /* This function always performs a signed comparison, but __v16qi is a char
959 which may be signed or unsigned. */
960 typedef signed char __v16qs __attribute__((__vector_size__(16)));
961 return (__m128i)((__v16qs)__a > (__v16qs)__b);
962 }
963
964 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
965 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
966 {
967 return (__m128i)((__v8hi)__a > (__v8hi)__b);
968 }
969
970 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
971 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
972 {
973 return (__m128i)((__v4si)__a > (__v4si)__b);
974 }
975
976 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
977 _mm_cmplt_epi8(__m128i __a, __m128i __b)
978 {
979 return _mm_cmpgt_epi8(__b, __a);
980 }
981
982 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
983 _mm_cmplt_epi16(__m128i __a, __m128i __b)
984 {
985 return _mm_cmpgt_epi16(__b, __a);
986 }
987
988 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
989 _mm_cmplt_epi32(__m128i __a, __m128i __b)
990 {
991 return _mm_cmpgt_epi32(__b, __a);
992 }
993
994 #ifdef __x86_64__
995 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
996 _mm_cvtsi64_sd(__m128d __a, long long __b)
997 {
998 __a[0] = __b;
999 return __a;
1000 }
1001
1002 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1003 _mm_cvtsd_si64(__m128d __a)
1004 {
1005 return __builtin_ia32_cvtsd2si64(__a);
1006 }
1007
1008 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1009 _mm_cvttsd_si64(__m128d __a)
1010 {
1011 return __a[0];
1012 }
1013 #endif
1014
1015 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1016 _mm_cvtepi32_ps(__m128i __a)
1017 {
1018 return __builtin_ia32_cvtdq2ps((__v4si)__a);
1019 }
1020
1021 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1022 _mm_cvtps_epi32(__m128 __a)
1023 {
1024 return (__m128i)__builtin_ia32_cvtps2dq(__a);
1025 }
1026
1027 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1028 _mm_cvttps_epi32(__m128 __a)
1029 {
1030 return (__m128i)__builtin_ia32_cvttps2dq(__a);
1031 }
1032
1033 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1034 _mm_cvtsi32_si128(int __a)
1035 {
1036 return (__m128i)(__v4si){ __a, 0, 0, 0 };
1037 }
1038
1039 #ifdef __x86_64__
1040 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1041 _mm_cvtsi64_si128(long long __a)
1042 {
1043 return (__m128i){ __a, 0 };
1044 }
1045 #endif
1046
1047 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1048 _mm_cvtsi128_si32(__m128i __a)
1049 {
1050 __v4si __b = (__v4si)__a;
1051 return __b[0];
1052 }
1053
1054 #ifdef __x86_64__
1055 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1056 _mm_cvtsi128_si64(__m128i __a)
1057 {
1058 return __a[0];
1059 }
1060 #endif
1061
1062 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1063 _mm_load_si128(__m128i const *__p)
1064 {
1065 return *__p;
1066 }
1067
1068 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1069 _mm_loadu_si128(__m128i const *__p)
1070 {
1071 struct __loadu_si128 {
1072 __m128i __v;
1073 } __attribute__((packed, may_alias));
1074 return ((struct __loadu_si128*)__p)->__v;
1075 }
1076
1077 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1078 _mm_loadl_epi64(__m128i const *__p)
1079 {
1080 struct __mm_loadl_epi64_struct {
1081 long long __u;
1082 } __attribute__((__packed__, __may_alias__));
1083 return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1084 }
1085
1086 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1087 _mm_set_epi64x(long long q1, long long q0)
1088 {
1089 return (__m128i){ q0, q1 };
1090 }
1091
1092 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1093 _mm_set_epi64(__m64 q1, __m64 q0)
1094 {
1095 return (__m128i){ (long long)q0, (long long)q1 };
1096 }
1097
1098 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1099 _mm_set_epi32(int i3, int i2, int i1, int i0)
1100 {
1101 return (__m128i)(__v4si){ i0, i1, i2, i3};
1102 }
1103
1104 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1105 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1106 {
1107 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1108 }
1109
1110 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1111 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9 , char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b 0)
1112 {
1113 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1114 }
1115
1116 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1117 _mm_set1_epi64x(long long __q)
1118 {
1119 return (__m128i){ __q, __q };
1120 }
1121
1122 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1123 _mm_set1_epi64(__m64 __q)
1124 {
1125 return (__m128i){ (long long)__q, (long long)__q };
1126 }
1127
1128 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1129 _mm_set1_epi32(int __i)
1130 {
1131 return (__m128i)(__v4si){ __i, __i, __i, __i };
1132 }
1133
1134 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1135 _mm_set1_epi16(short __w)
1136 {
1137 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1138 }
1139
1140 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1141 _mm_set1_epi8(char __b)
1142 {
1143 return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, _ _b, __b, __b, __b, __b, __b };
1144 }
1145
1146 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1147 _mm_setr_epi64(__m64 q0, __m64 q1)
1148 {
1149 return (__m128i){ (long long)q0, (long long)q1 };
1150 }
1151
1152 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1153 _mm_setr_epi32(int i0, int i1, int i2, int i3)
1154 {
1155 return (__m128i)(__v4si){ i0, i1, i2, i3};
1156 }
1157
1158 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1159 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1160 {
1161 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1162 }
1163
1164 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1165 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, cha r b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b 15)
1166 {
1167 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1168 }
1169
1170 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1171 _mm_setzero_si128(void)
1172 {
1173 return (__m128i){ 0LL, 0LL };
1174 }
1175
1176 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1177 _mm_store_si128(__m128i *__p, __m128i __b)
1178 {
1179 *__p = __b;
1180 }
1181
1182 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1183 _mm_storeu_si128(__m128i *__p, __m128i __b)
1184 {
1185 __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
1186 }
1187
1188 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1189 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
1190 {
1191 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1192 }
1193
1194 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1195 _mm_storel_epi64(__m128i *__p, __m128i __a)
1196 {
1197 struct __mm_storel_epi64_struct {
1198 long long __u;
1199 } __attribute__((__packed__, __may_alias__));
1200 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1201 }
1202
1203 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1204 _mm_stream_pd(double *__p, __m128d __a)
1205 {
1206 __builtin_ia32_movntpd(__p, __a);
1207 }
1208
1209 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1210 _mm_stream_si128(__m128i *__p, __m128i __a)
1211 {
1212 __builtin_ia32_movntdq(__p, __a);
1213 }
1214
1215 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1216 _mm_stream_si32(int *__p, int __a)
1217 {
1218 __builtin_ia32_movnti(__p, __a);
1219 }
1220
1221 #ifdef __x86_64__
1222 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1223 _mm_stream_si64(long long *__p, long long __a)
1224 {
1225 __builtin_ia32_movnti64(__p, __a);
1226 }
1227 #endif
1228
1229 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1230 _mm_clflush(void const *__p)
1231 {
1232 __builtin_ia32_clflush(__p);
1233 }
1234
1235 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1236 _mm_lfence(void)
1237 {
1238 __builtin_ia32_lfence();
1239 }
1240
1241 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1242 _mm_mfence(void)
1243 {
1244 __builtin_ia32_mfence();
1245 }
1246
1247 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1248 _mm_packs_epi16(__m128i __a, __m128i __b)
1249 {
1250 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1251 }
1252
1253 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1254 _mm_packs_epi32(__m128i __a, __m128i __b)
1255 {
1256 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1257 }
1258
1259 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1260 _mm_packus_epi16(__m128i __a, __m128i __b)
1261 {
1262 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1263 }
1264
1265 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1266 _mm_extract_epi16(__m128i __a, int __imm)
1267 {
1268 __v8hi __b = (__v8hi)__a;
1269 return (unsigned short)__b[__imm & 7];
1270 }
1271
1272 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1273 _mm_insert_epi16(__m128i __a, int __b, int __imm)
1274 {
1275 __v8hi __c = (__v8hi)__a;
1276 __c[__imm & 7] = __b;
1277 return (__m128i)__c;
1278 }
1279
1280 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1281 _mm_movemask_epi8(__m128i __a)
1282 {
1283 return __builtin_ia32_pmovmskb128((__v16qi)__a);
1284 }
1285
1286 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1287 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
1288 __m128i __a = (a); \
1289 _Pragma("clang diagnostic pop"); \
1290 (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1291 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1292 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1293
1294 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1295 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
1296 __m128i __a = (a); \
1297 _Pragma("clang diagnostic pop"); \
1298 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1299 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1300 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1301 4, 5, 6, 7); })
1302
1303 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1304 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
1305 __m128i __a = (a); \
1306 _Pragma("clang diagnostic pop"); \
1307 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1308 0, 1, 2, 3, \
1309 4 + (((imm) & 0x03) >> 0), \
1310 4 + (((imm) & 0x0c) >> 2), \
1311 4 + (((imm) & 0x30) >> 4), \
1312 4 + (((imm) & 0xc0) >> 6)); })
1313
1314 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1315 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
1316 {
1317 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9 , 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1318 }
1319
1320 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1321 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
1322 {
1323 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8 +5, 6, 8+6, 7, 8+7);
1324 }
1325
1326 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1327 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
1328 {
1329 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4 +3);
1330 }
1331
1332 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1333 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
1334 {
1335 return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1336 }
1337
1338 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1339 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
1340 {
1341 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1 , 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1342 }
1343
1344 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1345 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
1346 {
1347 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8 +1, 2, 8+2, 3, 8+3);
1348 }
1349
1350 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1351 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
1352 {
1353 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4 +1);
1354 }
1355
1356 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1357 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
1358 {
1359 return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1360 }
1361
1362 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
1363 _mm_movepi64_pi64(__m128i __a)
1364 {
1365 return (__m64)__a[0];
1366 }
1367
1368 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1369 _mm_movpi64_epi64(__m64 __a)
1370 {
1371 return (__m128i){ (long long)__a, 0 };
1372 }
1373
1374 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1375 _mm_move_epi64(__m128i __a)
1376 {
1377 return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1378 }
1379
1380 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1381 _mm_unpackhi_pd(__m128d __a, __m128d __b)
1382 {
1383 return __builtin_shufflevector(__a, __b, 1, 2+1);
1384 }
1385
1386 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1387 _mm_unpacklo_pd(__m128d __a, __m128d __b)
1388 {
1389 return __builtin_shufflevector(__a, __b, 0, 2+0);
1390 }
1391
1392 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1393 _mm_movemask_pd(__m128d __a)
1394 {
1395 return __builtin_ia32_movmskpd(__a);
1396 }
1397
1398 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1399 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\ ""); \
1400 __m128d __a = (a); \
1401 __m128d __b = (b); \
1402 _Pragma("clang diagnostic pop"); \
1403 __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
1404
1405 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1406 _mm_castpd_ps(__m128d __a)
1407 {
1408 return (__m128)__a;
1409 }
1410
1411 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1412 _mm_castpd_si128(__m128d __a)
1413 {
1414 return (__m128i)__a;
1415 }
1416
1417 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1418 _mm_castps_pd(__m128 __a)
1419 {
1420 return (__m128d)__a;
1421 }
1422
1423 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1424 _mm_castps_si128(__m128 __a)
1425 {
1426 return (__m128i)__a;
1427 }
1428
1429 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1430 _mm_castsi128_ps(__m128i __a)
1431 {
1432 return (__m128)__a;
1433 }
1434
1435 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1436 _mm_castsi128_pd(__m128i __a)
1437 {
1438 return (__m128d)__a;
1439 }
1440
1441 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1442 _mm_pause(void)
1443 {
1444 __asm__ volatile ("pause");
1445 }
1446
1447 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1448
1449 #endif /* __SSE2__ */
1450
1451 #endif /* __EMMINTRIN_H */
OLDNEW
« no previous file with comments | « Linux_x64/lib/clang/3.5.0/include/cpuid.h ('k') | Linux_x64/lib/clang/3.5.0/include/f16cintrin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698