Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: third_party/asan/asan_clang_Linux/lib/clang/3.0/include/emmintrin.h

Issue 8404033: New ASan binaries for Linux (r946) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/
Patch Set: Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26
27 #ifndef __SSE2__
28 #error "SSE2 instruction set not enabled"
29 #else
30
31 #include <xmmintrin.h>
32
33 typedef double __m128d __attribute__((__vector_size__(16)));
34 typedef long long __m128i __attribute__((__vector_size__(16)));
35
36 /* Type defines. */
37 typedef double __v2df __attribute__ ((__vector_size__ (16)));
38 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39 typedef short __v8hi __attribute__((__vector_size__(16)));
40 typedef char __v16qi __attribute__((__vector_size__(16)));
41
42 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
43 _mm_add_sd(__m128d a, __m128d b)
44 {
45 a[0] += b[0];
46 return a;
47 }
48
49 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
50 _mm_add_pd(__m128d a, __m128d b)
51 {
52 return a + b;
53 }
54
55 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
56 _mm_sub_sd(__m128d a, __m128d b)
57 {
58 a[0] -= b[0];
59 return a;
60 }
61
62 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
63 _mm_sub_pd(__m128d a, __m128d b)
64 {
65 return a - b;
66 }
67
68 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
69 _mm_mul_sd(__m128d a, __m128d b)
70 {
71 a[0] *= b[0];
72 return a;
73 }
74
75 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
76 _mm_mul_pd(__m128d a, __m128d b)
77 {
78 return a * b;
79 }
80
81 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
82 _mm_div_sd(__m128d a, __m128d b)
83 {
84 a[0] /= b[0];
85 return a;
86 }
87
88 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
89 _mm_div_pd(__m128d a, __m128d b)
90 {
91 return a / b;
92 }
93
94 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
95 _mm_sqrt_sd(__m128d a, __m128d b)
96 {
97 __m128d c = __builtin_ia32_sqrtsd(b);
98 return (__m128d) { c[0], a[1] };
99 }
100
101 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102 _mm_sqrt_pd(__m128d a)
103 {
104 return __builtin_ia32_sqrtpd(a);
105 }
106
107 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
108 _mm_min_sd(__m128d a, __m128d b)
109 {
110 return __builtin_ia32_minsd(a, b);
111 }
112
113 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114 _mm_min_pd(__m128d a, __m128d b)
115 {
116 return __builtin_ia32_minpd(a, b);
117 }
118
119 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
120 _mm_max_sd(__m128d a, __m128d b)
121 {
122 return __builtin_ia32_maxsd(a, b);
123 }
124
125 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126 _mm_max_pd(__m128d a, __m128d b)
127 {
128 return __builtin_ia32_maxpd(a, b);
129 }
130
131 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
132 _mm_and_pd(__m128d a, __m128d b)
133 {
134 return (__m128d)((__v4si)a & (__v4si)b);
135 }
136
137 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138 _mm_andnot_pd(__m128d a, __m128d b)
139 {
140 return (__m128d)(~(__v4si)a & (__v4si)b);
141 }
142
143 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
144 _mm_or_pd(__m128d a, __m128d b)
145 {
146 return (__m128d)((__v4si)a | (__v4si)b);
147 }
148
149 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150 _mm_xor_pd(__m128d a, __m128d b)
151 {
152 return (__m128d)((__v4si)a ^ (__v4si)b);
153 }
154
155 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
156 _mm_cmpeq_pd(__m128d a, __m128d b)
157 {
158 return (__m128d)__builtin_ia32_cmppd(a, b, 0);
159 }
160
161 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
162 _mm_cmplt_pd(__m128d a, __m128d b)
163 {
164 return (__m128d)__builtin_ia32_cmppd(a, b, 1);
165 }
166
167 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
168 _mm_cmple_pd(__m128d a, __m128d b)
169 {
170 return (__m128d)__builtin_ia32_cmppd(a, b, 2);
171 }
172
173 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
174 _mm_cmpgt_pd(__m128d a, __m128d b)
175 {
176 return (__m128d)__builtin_ia32_cmppd(b, a, 1);
177 }
178
179 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
180 _mm_cmpge_pd(__m128d a, __m128d b)
181 {
182 return (__m128d)__builtin_ia32_cmppd(b, a, 2);
183 }
184
185 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
186 _mm_cmpord_pd(__m128d a, __m128d b)
187 {
188 return (__m128d)__builtin_ia32_cmppd(a, b, 7);
189 }
190
191 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
192 _mm_cmpunord_pd(__m128d a, __m128d b)
193 {
194 return (__m128d)__builtin_ia32_cmppd(a, b, 3);
195 }
196
197 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
198 _mm_cmpneq_pd(__m128d a, __m128d b)
199 {
200 return (__m128d)__builtin_ia32_cmppd(a, b, 4);
201 }
202
203 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
204 _mm_cmpnlt_pd(__m128d a, __m128d b)
205 {
206 return (__m128d)__builtin_ia32_cmppd(a, b, 5);
207 }
208
209 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
210 _mm_cmpnle_pd(__m128d a, __m128d b)
211 {
212 return (__m128d)__builtin_ia32_cmppd(a, b, 6);
213 }
214
215 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
216 _mm_cmpngt_pd(__m128d a, __m128d b)
217 {
218 return (__m128d)__builtin_ia32_cmppd(b, a, 5);
219 }
220
221 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
222 _mm_cmpnge_pd(__m128d a, __m128d b)
223 {
224 return (__m128d)__builtin_ia32_cmppd(b, a, 6);
225 }
226
227 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
228 _mm_cmpeq_sd(__m128d a, __m128d b)
229 {
230 return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
231 }
232
233 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
234 _mm_cmplt_sd(__m128d a, __m128d b)
235 {
236 return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
237 }
238
239 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
240 _mm_cmple_sd(__m128d a, __m128d b)
241 {
242 return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
243 }
244
245 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
246 _mm_cmpgt_sd(__m128d a, __m128d b)
247 {
248 return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
249 }
250
251 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
252 _mm_cmpge_sd(__m128d a, __m128d b)
253 {
254 return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
255 }
256
257 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
258 _mm_cmpord_sd(__m128d a, __m128d b)
259 {
260 return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
261 }
262
263 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
264 _mm_cmpunord_sd(__m128d a, __m128d b)
265 {
266 return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
267 }
268
269 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
270 _mm_cmpneq_sd(__m128d a, __m128d b)
271 {
272 return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
273 }
274
275 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
276 _mm_cmpnlt_sd(__m128d a, __m128d b)
277 {
278 return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
279 }
280
281 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
282 _mm_cmpnle_sd(__m128d a, __m128d b)
283 {
284 return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
285 }
286
287 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
288 _mm_cmpngt_sd(__m128d a, __m128d b)
289 {
290 return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
291 }
292
293 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
294 _mm_cmpnge_sd(__m128d a, __m128d b)
295 {
296 return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
297 }
298
299 static __inline__ int __attribute__((__always_inline__, __nodebug__))
300 _mm_comieq_sd(__m128d a, __m128d b)
301 {
302 return __builtin_ia32_comisdeq(a, b);
303 }
304
305 static __inline__ int __attribute__((__always_inline__, __nodebug__))
306 _mm_comilt_sd(__m128d a, __m128d b)
307 {
308 return __builtin_ia32_comisdlt(a, b);
309 }
310
311 static __inline__ int __attribute__((__always_inline__, __nodebug__))
312 _mm_comile_sd(__m128d a, __m128d b)
313 {
314 return __builtin_ia32_comisdle(a, b);
315 }
316
317 static __inline__ int __attribute__((__always_inline__, __nodebug__))
318 _mm_comigt_sd(__m128d a, __m128d b)
319 {
320 return __builtin_ia32_comisdgt(a, b);
321 }
322
323 static __inline__ int __attribute__((__always_inline__, __nodebug__))
324 _mm_comineq_sd(__m128d a, __m128d b)
325 {
326 return __builtin_ia32_comisdneq(a, b);
327 }
328
329 static __inline__ int __attribute__((__always_inline__, __nodebug__))
330 _mm_ucomieq_sd(__m128d a, __m128d b)
331 {
332 return __builtin_ia32_ucomisdeq(a, b);
333 }
334
335 static __inline__ int __attribute__((__always_inline__, __nodebug__))
336 _mm_ucomilt_sd(__m128d a, __m128d b)
337 {
338 return __builtin_ia32_ucomisdlt(a, b);
339 }
340
341 static __inline__ int __attribute__((__always_inline__, __nodebug__))
342 _mm_ucomile_sd(__m128d a, __m128d b)
343 {
344 return __builtin_ia32_ucomisdle(a, b);
345 }
346
347 static __inline__ int __attribute__((__always_inline__, __nodebug__))
348 _mm_ucomigt_sd(__m128d a, __m128d b)
349 {
350 return __builtin_ia32_ucomisdgt(a, b);
351 }
352
353 static __inline__ int __attribute__((__always_inline__, __nodebug__))
354 _mm_ucomige_sd(__m128d a, __m128d b)
355 {
356 return __builtin_ia32_ucomisdge(a, b);
357 }
358
359 static __inline__ int __attribute__((__always_inline__, __nodebug__))
360 _mm_ucomineq_sd(__m128d a, __m128d b)
361 {
362 return __builtin_ia32_ucomisdneq(a, b);
363 }
364
365 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
366 _mm_cvtpd_ps(__m128d a)
367 {
368 return __builtin_ia32_cvtpd2ps(a);
369 }
370
371 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
372 _mm_cvtps_pd(__m128 a)
373 {
374 return __builtin_ia32_cvtps2pd(a);
375 }
376
377 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
378 _mm_cvtepi32_pd(__m128i a)
379 {
380 return __builtin_ia32_cvtdq2pd((__v4si)a);
381 }
382
383 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
384 _mm_cvtpd_epi32(__m128d a)
385 {
386 return __builtin_ia32_cvtpd2dq(a);
387 }
388
389 static __inline__ int __attribute__((__always_inline__, __nodebug__))
390 _mm_cvtsd_si32(__m128d a)
391 {
392 return __builtin_ia32_cvtsd2si(a);
393 }
394
395 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
396 _mm_cvtsd_ss(__m128 a, __m128d b)
397 {
398 a[0] = b[0];
399 return a;
400 }
401
402 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
403 _mm_cvtsi32_sd(__m128d a, int b)
404 {
405 a[0] = b;
406 return a;
407 }
408
409 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
410 _mm_cvtss_sd(__m128d a, __m128 b)
411 {
412 a[0] = b[0];
413 return a;
414 }
415
416 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
417 _mm_cvttpd_epi32(__m128d a)
418 {
419 return (__m128i)__builtin_ia32_cvttpd2dq(a);
420 }
421
422 static __inline__ int __attribute__((__always_inline__, __nodebug__))
423 _mm_cvttsd_si32(__m128d a)
424 {
425 return a[0];
426 }
427
428 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
429 _mm_cvtpd_pi32(__m128d a)
430 {
431 return (__m64)__builtin_ia32_cvtpd2pi(a);
432 }
433
434 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
435 _mm_cvttpd_pi32(__m128d a)
436 {
437 return (__m64)__builtin_ia32_cvttpd2pi(a);
438 }
439
440 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
441 _mm_cvtpi32_pd(__m64 a)
442 {
443 return __builtin_ia32_cvtpi2pd((__v2si)a);
444 }
445
446 static __inline__ double __attribute__((__always_inline__, __nodebug__))
447 _mm_cvtsd_f64(__m128d a)
448 {
449 return a[0];
450 }
451
452 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
453 _mm_load_pd(double const *dp)
454 {
455 return *(__m128d*)dp;
456 }
457
458 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
459 _mm_load1_pd(double const *dp)
460 {
461 struct __mm_load1_pd_struct {
462 double u;
463 } __attribute__((__packed__, __may_alias__));
464 double u = ((struct __mm_load1_pd_struct*)dp)->u;
465 return (__m128d){ u, u };
466 }
467
468 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
469
470 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
471 _mm_loadr_pd(double const *dp)
472 {
473 __m128d u = *(__m128d*)dp;
474 return __builtin_shufflevector(u, u, 1, 0);
475 }
476
477 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
478 _mm_loadu_pd(double const *dp)
479 {
480 struct __loadu_pd {
481 __m128d v;
482 } __attribute__((packed, may_alias));
483 return ((struct __loadu_pd*)dp)->v;
484 }
485
486 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
487 _mm_load_sd(double const *dp)
488 {
489 struct __mm_load_sd_struct {
490 double u;
491 } __attribute__((__packed__, __may_alias__));
492 double u = ((struct __mm_load_sd_struct*)dp)->u;
493 return (__m128d){ u, 0 };
494 }
495
496 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
497 _mm_loadh_pd(__m128d a, double const *dp)
498 {
499 struct __mm_loadh_pd_struct {
500 double u;
501 } __attribute__((__packed__, __may_alias__));
502 double u = ((struct __mm_loadh_pd_struct*)dp)->u;
503 return (__m128d){ a[0], u };
504 }
505
506 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
507 _mm_loadl_pd(__m128d a, double const *dp)
508 {
509 struct __mm_loadl_pd_struct {
510 double u;
511 } __attribute__((__packed__, __may_alias__));
512 double u = ((struct __mm_loadl_pd_struct*)dp)->u;
513 return (__m128d){ u, a[1] };
514 }
515
516 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
517 _mm_set_sd(double w)
518 {
519 return (__m128d){ w, 0 };
520 }
521
522 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
523 _mm_set1_pd(double w)
524 {
525 return (__m128d){ w, w };
526 }
527
528 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
529 _mm_set_pd(double w, double x)
530 {
531 return (__m128d){ x, w };
532 }
533
534 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
535 _mm_setr_pd(double w, double x)
536 {
537 return (__m128d){ w, x };
538 }
539
540 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
541 _mm_setzero_pd(void)
542 {
543 return (__m128d){ 0, 0 };
544 }
545
546 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
547 _mm_move_sd(__m128d a, __m128d b)
548 {
549 return (__m128d){ b[0], a[1] };
550 }
551
552 static __inline__ void __attribute__((__always_inline__, __nodebug__))
553 _mm_store_sd(double *dp, __m128d a)
554 {
555 struct __mm_store_sd_struct {
556 double u;
557 } __attribute__((__packed__, __may_alias__));
558 ((struct __mm_store_sd_struct*)dp)->u = a[0];
559 }
560
561 static __inline__ void __attribute__((__always_inline__, __nodebug__))
562 _mm_store1_pd(double *dp, __m128d a)
563 {
564 struct __mm_store1_pd_struct {
565 double u[2];
566 } __attribute__((__packed__, __may_alias__));
567 ((struct __mm_store1_pd_struct*)dp)->u[0] = a[0];
568 ((struct __mm_store1_pd_struct*)dp)->u[1] = a[0];
569 }
570
571 static __inline__ void __attribute__((__always_inline__, __nodebug__))
572 _mm_store_pd(double *dp, __m128d a)
573 {
574 *(__m128d *)dp = a;
575 }
576
577 static __inline__ void __attribute__((__always_inline__, __nodebug__))
578 _mm_storeu_pd(double *dp, __m128d a)
579 {
580 __builtin_ia32_storeupd(dp, a);
581 }
582
583 static __inline__ void __attribute__((__always_inline__, __nodebug__))
584 _mm_storer_pd(double *dp, __m128d a)
585 {
586 a = __builtin_shufflevector(a, a, 1, 0);
587 *(__m128d *)dp = a;
588 }
589
590 static __inline__ void __attribute__((__always_inline__, __nodebug__))
591 _mm_storeh_pd(double *dp, __m128d a)
592 {
593 struct __mm_storeh_pd_struct {
594 double u;
595 } __attribute__((__packed__, __may_alias__));
596 ((struct __mm_storeh_pd_struct*)dp)->u = a[1];
597 }
598
599 static __inline__ void __attribute__((__always_inline__, __nodebug__))
600 _mm_storel_pd(double *dp, __m128d a)
601 {
602 struct __mm_storeh_pd_struct {
603 double u;
604 } __attribute__((__packed__, __may_alias__));
605 ((struct __mm_storeh_pd_struct*)dp)->u = a[0];
606 }
607
608 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
609 _mm_add_epi8(__m128i a, __m128i b)
610 {
611 return (__m128i)((__v16qi)a + (__v16qi)b);
612 }
613
614 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
615 _mm_add_epi16(__m128i a, __m128i b)
616 {
617 return (__m128i)((__v8hi)a + (__v8hi)b);
618 }
619
620 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
621 _mm_add_epi32(__m128i a, __m128i b)
622 {
623 return (__m128i)((__v4si)a + (__v4si)b);
624 }
625
626 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
627 _mm_add_si64(__m64 a, __m64 b)
628 {
629 return a + b;
630 }
631
632 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
633 _mm_add_epi64(__m128i a, __m128i b)
634 {
635 return a + b;
636 }
637
638 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
639 _mm_adds_epi8(__m128i a, __m128i b)
640 {
641 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
642 }
643
644 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
645 _mm_adds_epi16(__m128i a, __m128i b)
646 {
647 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
648 }
649
650 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
651 _mm_adds_epu8(__m128i a, __m128i b)
652 {
653 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
654 }
655
656 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
657 _mm_adds_epu16(__m128i a, __m128i b)
658 {
659 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
660 }
661
662 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
663 _mm_avg_epu8(__m128i a, __m128i b)
664 {
665 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
666 }
667
668 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
669 _mm_avg_epu16(__m128i a, __m128i b)
670 {
671 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
672 }
673
674 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
675 _mm_madd_epi16(__m128i a, __m128i b)
676 {
677 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
678 }
679
680 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
681 _mm_max_epi16(__m128i a, __m128i b)
682 {
683 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
684 }
685
686 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
687 _mm_max_epu8(__m128i a, __m128i b)
688 {
689 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
690 }
691
692 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
693 _mm_min_epi16(__m128i a, __m128i b)
694 {
695 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
696 }
697
698 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
699 _mm_min_epu8(__m128i a, __m128i b)
700 {
701 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
702 }
703
704 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
705 _mm_mulhi_epi16(__m128i a, __m128i b)
706 {
707 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
708 }
709
710 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
711 _mm_mulhi_epu16(__m128i a, __m128i b)
712 {
713 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
714 }
715
716 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
717 _mm_mullo_epi16(__m128i a, __m128i b)
718 {
719 return (__m128i)((__v8hi)a * (__v8hi)b);
720 }
721
722 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
723 _mm_mul_su32(__m64 a, __m64 b)
724 {
725 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
726 }
727
728 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
729 _mm_mul_epu32(__m128i a, __m128i b)
730 {
731 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
732 }
733
734 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
735 _mm_sad_epu8(__m128i a, __m128i b)
736 {
737 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
738 }
739
740 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
741 _mm_sub_epi8(__m128i a, __m128i b)
742 {
743 return (__m128i)((__v16qi)a - (__v16qi)b);
744 }
745
746 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
747 _mm_sub_epi16(__m128i a, __m128i b)
748 {
749 return (__m128i)((__v8hi)a - (__v8hi)b);
750 }
751
752 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
753 _mm_sub_epi32(__m128i a, __m128i b)
754 {
755 return (__m128i)((__v4si)a - (__v4si)b);
756 }
757
758 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
759 _mm_sub_si64(__m64 a, __m64 b)
760 {
761 return a - b;
762 }
763
764 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
765 _mm_sub_epi64(__m128i a, __m128i b)
766 {
767 return a - b;
768 }
769
770 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
771 _mm_subs_epi8(__m128i a, __m128i b)
772 {
773 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
774 }
775
776 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
777 _mm_subs_epi16(__m128i a, __m128i b)
778 {
779 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
780 }
781
782 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
783 _mm_subs_epu8(__m128i a, __m128i b)
784 {
785 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
786 }
787
788 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
789 _mm_subs_epu16(__m128i a, __m128i b)
790 {
791 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
792 }
793
794 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
795 _mm_and_si128(__m128i a, __m128i b)
796 {
797 return a & b;
798 }
799
800 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
801 _mm_andnot_si128(__m128i a, __m128i b)
802 {
803 return ~a & b;
804 }
805
806 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
807 _mm_or_si128(__m128i a, __m128i b)
808 {
809 return a | b;
810 }
811
812 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
813 _mm_xor_si128(__m128i a, __m128i b)
814 {
815 return a ^ b;
816 }
817
818 #define _mm_slli_si128(VEC, IMM) \
819 ((__m128i)__builtin_ia32_pslldqi128((__m128i)(VEC), (IMM)*8))
820
821 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
822 _mm_slli_epi16(__m128i a, int count)
823 {
824 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
825 }
826
827 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
828 _mm_sll_epi16(__m128i a, __m128i count)
829 {
830 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
831 }
832
833 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
834 _mm_slli_epi32(__m128i a, int count)
835 {
836 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
837 }
838
839 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
840 _mm_sll_epi32(__m128i a, __m128i count)
841 {
842 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
843 }
844
845 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
846 _mm_slli_epi64(__m128i a, int count)
847 {
848 return __builtin_ia32_psllqi128(a, count);
849 }
850
851 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
852 _mm_sll_epi64(__m128i a, __m128i count)
853 {
854 return __builtin_ia32_psllq128(a, count);
855 }
856
857 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
858 _mm_srai_epi16(__m128i a, int count)
859 {
860 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
861 }
862
863 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
864 _mm_sra_epi16(__m128i a, __m128i count)
865 {
866 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
867 }
868
869 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
870 _mm_srai_epi32(__m128i a, int count)
871 {
872 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
873 }
874
875 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
876 _mm_sra_epi32(__m128i a, __m128i count)
877 {
878 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
879 }
880
881
882 #define _mm_srli_si128(VEC, IMM) \
883 ((__m128i)__builtin_ia32_psrldqi128((__m128i)(VEC), (IMM)*8))
884
885 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
886 _mm_srli_epi16(__m128i a, int count)
887 {
888 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
889 }
890
891 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
892 _mm_srl_epi16(__m128i a, __m128i count)
893 {
894 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
895 }
896
897 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
898 _mm_srli_epi32(__m128i a, int count)
899 {
900 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
901 }
902
903 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
904 _mm_srl_epi32(__m128i a, __m128i count)
905 {
906 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
907 }
908
909 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
910 _mm_srli_epi64(__m128i a, int count)
911 {
912 return __builtin_ia32_psrlqi128(a, count);
913 }
914
915 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
916 _mm_srl_epi64(__m128i a, __m128i count)
917 {
918 return __builtin_ia32_psrlq128(a, count);
919 }
920
921 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
922 _mm_cmpeq_epi8(__m128i a, __m128i b)
923 {
924 return (__m128i)((__v16qi)a == (__v16qi)b);
925 }
926
927 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
928 _mm_cmpeq_epi16(__m128i a, __m128i b)
929 {
930 return (__m128i)((__v8hi)a == (__v8hi)b);
931 }
932
933 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
934 _mm_cmpeq_epi32(__m128i a, __m128i b)
935 {
936 return (__m128i)((__v4si)a == (__v4si)b);
937 }
938
939 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
940 _mm_cmpgt_epi8(__m128i a, __m128i b)
941 {
942 return (__m128i)((__v16qi)a > (__v16qi)b);
943 }
944
945 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
946 _mm_cmpgt_epi16(__m128i a, __m128i b)
947 {
948 return (__m128i)((__v8hi)a > (__v8hi)b);
949 }
950
951 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
952 _mm_cmpgt_epi32(__m128i a, __m128i b)
953 {
954 return (__m128i)((__v4si)a > (__v4si)b);
955 }
956
957 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
958 _mm_cmplt_epi8(__m128i a, __m128i b)
959 {
960 return _mm_cmpgt_epi8(b,a);
961 }
962
963 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
964 _mm_cmplt_epi16(__m128i a, __m128i b)
965 {
966 return _mm_cmpgt_epi16(b,a);
967 }
968
969 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
970 _mm_cmplt_epi32(__m128i a, __m128i b)
971 {
972 return _mm_cmpgt_epi32(b,a);
973 }
974
975 #ifdef __x86_64__
976 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
977 _mm_cvtsi64_sd(__m128d a, long long b)
978 {
979 a[0] = b;
980 return a;
981 }
982
983 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
984 _mm_cvtsd_si64(__m128d a)
985 {
986 return __builtin_ia32_cvtsd2si64(a);
987 }
988
989 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
990 _mm_cvttsd_si64(__m128d a)
991 {
992 return a[0];
993 }
994 #endif
995
996 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
997 _mm_cvtepi32_ps(__m128i a)
998 {
999 return __builtin_ia32_cvtdq2ps((__v4si)a);
1000 }
1001
1002 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1003 _mm_cvtps_epi32(__m128 a)
1004 {
1005 return (__m128i)__builtin_ia32_cvtps2dq(a);
1006 }
1007
1008 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1009 _mm_cvttps_epi32(__m128 a)
1010 {
1011 return (__m128i)__builtin_ia32_cvttps2dq(a);
1012 }
1013
1014 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1015 _mm_cvtsi32_si128(int a)
1016 {
1017 return (__m128i)(__v4si){ a, 0, 0, 0 };
1018 }
1019
1020 #ifdef __x86_64__
1021 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1022 _mm_cvtsi64_si128(long long a)
1023 {
1024 return (__m128i){ a, 0 };
1025 }
1026 #endif
1027
1028 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1029 _mm_cvtsi128_si32(__m128i a)
1030 {
1031 __v4si b = (__v4si)a;
1032 return b[0];
1033 }
1034
1035 #ifdef __x86_64__
1036 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1037 _mm_cvtsi128_si64(__m128i a)
1038 {
1039 return a[0];
1040 }
1041 #endif
1042
1043 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1044 _mm_load_si128(__m128i const *p)
1045 {
1046 return *p;
1047 }
1048
1049 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1050 _mm_loadu_si128(__m128i const *p)
1051 {
1052 struct __loadu_si128 {
1053 __m128i v;
1054 } __attribute__((packed, may_alias));
1055 return ((struct __loadu_si128*)p)->v;
1056 }
1057
1058 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1059 _mm_loadl_epi64(__m128i const *p)
1060 {
1061 struct __mm_loadl_epi64_struct {
1062 long long u;
1063 } __attribute__((__packed__, __may_alias__));
1064 return (__m128i) { ((struct __mm_loadl_epi64_struct*)p)->u, 0};
1065 }
1066
1067 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1068 _mm_set_epi64x(long long q1, long long q0)
1069 {
1070 return (__m128i){ q0, q1 };
1071 }
1072
1073 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1074 _mm_set_epi64(__m64 q1, __m64 q0)
1075 {
1076 return (__m128i){ (long long)q0, (long long)q1 };
1077 }
1078
1079 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1080 _mm_set_epi32(int i3, int i2, int i1, int i0)
1081 {
1082 return (__m128i)(__v4si){ i0, i1, i2, i3};
1083 }
1084
1085 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1086 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1087 {
1088 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1089 }
1090
1091 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1092 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9 , char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b 0)
1093 {
1094 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1095 }
1096
1097 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1098 _mm_set1_epi64x(long long q)
1099 {
1100 return (__m128i){ q, q };
1101 }
1102
1103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1104 _mm_set1_epi64(__m64 q)
1105 {
1106 return (__m128i){ (long long)q, (long long)q };
1107 }
1108
1109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1110 _mm_set1_epi32(int i)
1111 {
1112 return (__m128i)(__v4si){ i, i, i, i };
1113 }
1114
1115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1116 _mm_set1_epi16(short w)
1117 {
1118 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1119 }
1120
1121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1122 _mm_set1_epi8(char b)
1123 {
1124 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1125 }
1126
1127 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1128 _mm_setr_epi64(__m64 q0, __m64 q1)
1129 {
1130 return (__m128i){ (long long)q0, (long long)q1 };
1131 }
1132
1133 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1134 _mm_setr_epi32(int i0, int i1, int i2, int i3)
1135 {
1136 return (__m128i)(__v4si){ i0, i1, i2, i3};
1137 }
1138
1139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1140 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1141 {
1142 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1143 }
1144
1145 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1146 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, cha r b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b 15)
1147 {
1148 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1149 }
1150
1151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1152 _mm_setzero_si128(void)
1153 {
1154 return (__m128i){ 0LL, 0LL };
1155 }
1156
1157 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1158 _mm_store_si128(__m128i *p, __m128i b)
1159 {
1160 *p = b;
1161 }
1162
1163 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1164 _mm_storeu_si128(__m128i *p, __m128i b)
1165 {
1166 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1167 }
1168
1169 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1170 _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1171 {
1172 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1173 }
1174
1175 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1176 _mm_storel_epi64(__m128i *p, __m128i a)
1177 {
1178 __builtin_ia32_storelv4si((__v2si *)p, a);
1179 }
1180
1181 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1182 _mm_stream_pd(double *p, __m128d a)
1183 {
1184 __builtin_ia32_movntpd(p, a);
1185 }
1186
1187 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1188 _mm_stream_si128(__m128i *p, __m128i a)
1189 {
1190 __builtin_ia32_movntdq(p, a);
1191 }
1192
1193 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1194 _mm_stream_si32(int *p, int a)
1195 {
1196 __builtin_ia32_movnti(p, a);
1197 }
1198
1199 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1200 _mm_clflush(void const *p)
1201 {
1202 __builtin_ia32_clflush(p);
1203 }
1204
1205 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1206 _mm_lfence(void)
1207 {
1208 __builtin_ia32_lfence();
1209 }
1210
1211 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1212 _mm_mfence(void)
1213 {
1214 __builtin_ia32_mfence();
1215 }
1216
1217 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1218 _mm_packs_epi16(__m128i a, __m128i b)
1219 {
1220 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1221 }
1222
1223 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1224 _mm_packs_epi32(__m128i a, __m128i b)
1225 {
1226 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1227 }
1228
1229 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1230 _mm_packus_epi16(__m128i a, __m128i b)
1231 {
1232 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1233 }
1234
1235 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1236 _mm_extract_epi16(__m128i a, int imm)
1237 {
1238 __v8hi b = (__v8hi)a;
1239 return (unsigned short)b[imm];
1240 }
1241
1242 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1243 _mm_insert_epi16(__m128i a, int b, int imm)
1244 {
1245 __v8hi c = (__v8hi)a;
1246 c[imm & 7] = b;
1247 return (__m128i)c;
1248 }
1249
1250 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1251 _mm_movemask_epi8(__m128i a)
1252 {
1253 return __builtin_ia32_pmovmskb128((__v16qi)a);
1254 }
1255
1256 #define _mm_shuffle_epi32(a, imm) \
1257 ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) _mm_set1_epi32(0), \
1258 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1259 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6))
1260
1261
1262 #define _mm_shufflelo_epi16(a, imm) \
1263 ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), \
1264 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1265 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1266 4, 5, 6, 7))
1267 #define _mm_shufflehi_epi16(a, imm) \
1268 ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), 0, 1, 2, 3, \
1269 4 + (((imm) & 0x03) >> 0), \
1270 4 + (((imm) & 0x0c) >> 2), \
1271 4 + (((imm) & 0x30) >> 4), \
1272 4 + (((imm) & 0xc0) >> 6)))
1273
1274 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1275 _mm_unpackhi_epi8(__m128i a, __m128i b)
1276 {
1277 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16 +9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1278 }
1279
1280 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1281 _mm_unpackhi_epi16(__m128i a, __m128i b)
1282 {
1283 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1284 }
1285
1286 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1287 _mm_unpackhi_epi32(__m128i a, __m128i b)
1288 {
1289 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
1290 }
1291
1292 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1293 _mm_unpackhi_epi64(__m128i a, __m128i b)
1294 {
1295 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
1296 }
1297
1298 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1299 _mm_unpacklo_epi8(__m128i a, __m128i b)
1300 {
1301 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16 +1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1302 }
1303
1304 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1305 _mm_unpacklo_epi16(__m128i a, __m128i b)
1306 {
1307 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1308 }
1309
1310 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1311 _mm_unpacklo_epi32(__m128i a, __m128i b)
1312 {
1313 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
1314 }
1315
1316 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1317 _mm_unpacklo_epi64(__m128i a, __m128i b)
1318 {
1319 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
1320 }
1321
1322 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
1323 _mm_movepi64_pi64(__m128i a)
1324 {
1325 return (__m64)a[0];
1326 }
1327
1328 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1329 _mm_movpi64_pi64(__m64 a)
1330 {
1331 return (__m128i){ (long long)a, 0 };
1332 }
1333
1334 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1335 _mm_move_epi64(__m128i a)
1336 {
1337 return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
1338 }
1339
1340 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1341 _mm_unpackhi_pd(__m128d a, __m128d b)
1342 {
1343 return __builtin_shufflevector(a, b, 1, 2+1);
1344 }
1345
1346 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1347 _mm_unpacklo_pd(__m128d a, __m128d b)
1348 {
1349 return __builtin_shufflevector(a, b, 0, 2+0);
1350 }
1351
1352 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1353 _mm_movemask_pd(__m128d a)
1354 {
1355 return __builtin_ia32_movmskpd(a);
1356 }
1357
1358 #define _mm_shuffle_pd(a, b, i) \
1359 (__builtin_shufflevector((__m128d)(a), (__m128d)(b), (i) & 1, \
1360 (((i) & 2) >> 1) + 2))
1361
1362 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1363 _mm_castpd_ps(__m128d in)
1364 {
1365 return (__m128)in;
1366 }
1367
1368 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1369 _mm_castpd_si128(__m128d in)
1370 {
1371 return (__m128i)in;
1372 }
1373
1374 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1375 _mm_castps_pd(__m128 in)
1376 {
1377 return (__m128d)in;
1378 }
1379
1380 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1381 _mm_castps_si128(__m128 in)
1382 {
1383 return (__m128i)in;
1384 }
1385
1386 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1387 _mm_castsi128_ps(__m128i in)
1388 {
1389 return (__m128)in;
1390 }
1391
1392 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1393 _mm_castsi128_pd(__m128i in)
1394 {
1395 return (__m128d)in;
1396 }
1397
1398 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1399 _mm_pause(void)
1400 {
1401 __asm__ volatile ("pause");
1402 }
1403
1404 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1405
1406 #endif /* __SSE2__ */
1407
1408 #endif /* __EMMINTRIN_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698