Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(608)

Side by Side Diff: Linux_ia32/lib/clang/3.2/include/emmintrin.h

Issue 11348245: Add 32-bit version of llvm-allocated-type. Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/llvm-allocated-type/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Linux_ia32/lib/clang/3.2/include/cpuid.h ('k') | Linux_ia32/lib/clang/3.2/include/f16cintrin.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26
27 #ifndef __SSE2__
28 #error "SSE2 instruction set not enabled"
29 #else
30
31 #include <xmmintrin.h>
32
33 typedef double __m128d __attribute__((__vector_size__(16)));
34 typedef long long __m128i __attribute__((__vector_size__(16)));
35
36 /* Type defines. */
37 typedef double __v2df __attribute__ ((__vector_size__ (16)));
38 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39 typedef short __v8hi __attribute__((__vector_size__(16)));
40 typedef char __v16qi __attribute__((__vector_size__(16)));
41
42 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
43 _mm_add_sd(__m128d a, __m128d b)
44 {
45 a[0] += b[0];
46 return a;
47 }
48
49 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
50 _mm_add_pd(__m128d a, __m128d b)
51 {
52 return a + b;
53 }
54
55 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
56 _mm_sub_sd(__m128d a, __m128d b)
57 {
58 a[0] -= b[0];
59 return a;
60 }
61
62 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
63 _mm_sub_pd(__m128d a, __m128d b)
64 {
65 return a - b;
66 }
67
68 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
69 _mm_mul_sd(__m128d a, __m128d b)
70 {
71 a[0] *= b[0];
72 return a;
73 }
74
75 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
76 _mm_mul_pd(__m128d a, __m128d b)
77 {
78 return a * b;
79 }
80
81 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
82 _mm_div_sd(__m128d a, __m128d b)
83 {
84 a[0] /= b[0];
85 return a;
86 }
87
88 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
89 _mm_div_pd(__m128d a, __m128d b)
90 {
91 return a / b;
92 }
93
94 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
95 _mm_sqrt_sd(__m128d a, __m128d b)
96 {
97 __m128d c = __builtin_ia32_sqrtsd(b);
98 return (__m128d) { c[0], a[1] };
99 }
100
101 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102 _mm_sqrt_pd(__m128d a)
103 {
104 return __builtin_ia32_sqrtpd(a);
105 }
106
107 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
108 _mm_min_sd(__m128d a, __m128d b)
109 {
110 return __builtin_ia32_minsd(a, b);
111 }
112
113 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114 _mm_min_pd(__m128d a, __m128d b)
115 {
116 return __builtin_ia32_minpd(a, b);
117 }
118
119 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
120 _mm_max_sd(__m128d a, __m128d b)
121 {
122 return __builtin_ia32_maxsd(a, b);
123 }
124
125 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126 _mm_max_pd(__m128d a, __m128d b)
127 {
128 return __builtin_ia32_maxpd(a, b);
129 }
130
131 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
132 _mm_and_pd(__m128d a, __m128d b)
133 {
134 return (__m128d)((__v4si)a & (__v4si)b);
135 }
136
137 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138 _mm_andnot_pd(__m128d a, __m128d b)
139 {
140 return (__m128d)(~(__v4si)a & (__v4si)b);
141 }
142
143 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
144 _mm_or_pd(__m128d a, __m128d b)
145 {
146 return (__m128d)((__v4si)a | (__v4si)b);
147 }
148
149 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150 _mm_xor_pd(__m128d a, __m128d b)
151 {
152 return (__m128d)((__v4si)a ^ (__v4si)b);
153 }
154
155 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
156 _mm_cmpeq_pd(__m128d a, __m128d b)
157 {
158 return (__m128d)__builtin_ia32_cmppd(a, b, 0);
159 }
160
161 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
162 _mm_cmplt_pd(__m128d a, __m128d b)
163 {
164 return (__m128d)__builtin_ia32_cmppd(a, b, 1);
165 }
166
167 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
168 _mm_cmple_pd(__m128d a, __m128d b)
169 {
170 return (__m128d)__builtin_ia32_cmppd(a, b, 2);
171 }
172
173 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
174 _mm_cmpgt_pd(__m128d a, __m128d b)
175 {
176 return (__m128d)__builtin_ia32_cmppd(b, a, 1);
177 }
178
179 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
180 _mm_cmpge_pd(__m128d a, __m128d b)
181 {
182 return (__m128d)__builtin_ia32_cmppd(b, a, 2);
183 }
184
185 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
186 _mm_cmpord_pd(__m128d a, __m128d b)
187 {
188 return (__m128d)__builtin_ia32_cmppd(a, b, 7);
189 }
190
191 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
192 _mm_cmpunord_pd(__m128d a, __m128d b)
193 {
194 return (__m128d)__builtin_ia32_cmppd(a, b, 3);
195 }
196
197 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
198 _mm_cmpneq_pd(__m128d a, __m128d b)
199 {
200 return (__m128d)__builtin_ia32_cmppd(a, b, 4);
201 }
202
203 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
204 _mm_cmpnlt_pd(__m128d a, __m128d b)
205 {
206 return (__m128d)__builtin_ia32_cmppd(a, b, 5);
207 }
208
209 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
210 _mm_cmpnle_pd(__m128d a, __m128d b)
211 {
212 return (__m128d)__builtin_ia32_cmppd(a, b, 6);
213 }
214
215 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
216 _mm_cmpngt_pd(__m128d a, __m128d b)
217 {
218 return (__m128d)__builtin_ia32_cmppd(b, a, 5);
219 }
220
221 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
222 _mm_cmpnge_pd(__m128d a, __m128d b)
223 {
224 return (__m128d)__builtin_ia32_cmppd(b, a, 6);
225 }
226
227 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
228 _mm_cmpeq_sd(__m128d a, __m128d b)
229 {
230 return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
231 }
232
233 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
234 _mm_cmplt_sd(__m128d a, __m128d b)
235 {
236 return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
237 }
238
239 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
240 _mm_cmple_sd(__m128d a, __m128d b)
241 {
242 return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
243 }
244
245 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
246 _mm_cmpgt_sd(__m128d a, __m128d b)
247 {
248 return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
249 }
250
251 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
252 _mm_cmpge_sd(__m128d a, __m128d b)
253 {
254 return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
255 }
256
257 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
258 _mm_cmpord_sd(__m128d a, __m128d b)
259 {
260 return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
261 }
262
263 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
264 _mm_cmpunord_sd(__m128d a, __m128d b)
265 {
266 return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
267 }
268
269 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
270 _mm_cmpneq_sd(__m128d a, __m128d b)
271 {
272 return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
273 }
274
275 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
276 _mm_cmpnlt_sd(__m128d a, __m128d b)
277 {
278 return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
279 }
280
281 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
282 _mm_cmpnle_sd(__m128d a, __m128d b)
283 {
284 return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
285 }
286
287 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
288 _mm_cmpngt_sd(__m128d a, __m128d b)
289 {
290 return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
291 }
292
293 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
294 _mm_cmpnge_sd(__m128d a, __m128d b)
295 {
296 return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
297 }
298
299 static __inline__ int __attribute__((__always_inline__, __nodebug__))
300 _mm_comieq_sd(__m128d a, __m128d b)
301 {
302 return __builtin_ia32_comisdeq(a, b);
303 }
304
305 static __inline__ int __attribute__((__always_inline__, __nodebug__))
306 _mm_comilt_sd(__m128d a, __m128d b)
307 {
308 return __builtin_ia32_comisdlt(a, b);
309 }
310
311 static __inline__ int __attribute__((__always_inline__, __nodebug__))
312 _mm_comile_sd(__m128d a, __m128d b)
313 {
314 return __builtin_ia32_comisdle(a, b);
315 }
316
317 static __inline__ int __attribute__((__always_inline__, __nodebug__))
318 _mm_comigt_sd(__m128d a, __m128d b)
319 {
320 return __builtin_ia32_comisdgt(a, b);
321 }
322
323 static __inline__ int __attribute__((__always_inline__, __nodebug__))
324 _mm_comige_sd(__m128d a, __m128d b)
325 {
326 return __builtin_ia32_comisdge(a, b);
327 }
328
329 static __inline__ int __attribute__((__always_inline__, __nodebug__))
330 _mm_comineq_sd(__m128d a, __m128d b)
331 {
332 return __builtin_ia32_comisdneq(a, b);
333 }
334
335 static __inline__ int __attribute__((__always_inline__, __nodebug__))
336 _mm_ucomieq_sd(__m128d a, __m128d b)
337 {
338 return __builtin_ia32_ucomisdeq(a, b);
339 }
340
341 static __inline__ int __attribute__((__always_inline__, __nodebug__))
342 _mm_ucomilt_sd(__m128d a, __m128d b)
343 {
344 return __builtin_ia32_ucomisdlt(a, b);
345 }
346
347 static __inline__ int __attribute__((__always_inline__, __nodebug__))
348 _mm_ucomile_sd(__m128d a, __m128d b)
349 {
350 return __builtin_ia32_ucomisdle(a, b);
351 }
352
353 static __inline__ int __attribute__((__always_inline__, __nodebug__))
354 _mm_ucomigt_sd(__m128d a, __m128d b)
355 {
356 return __builtin_ia32_ucomisdgt(a, b);
357 }
358
359 static __inline__ int __attribute__((__always_inline__, __nodebug__))
360 _mm_ucomige_sd(__m128d a, __m128d b)
361 {
362 return __builtin_ia32_ucomisdge(a, b);
363 }
364
365 static __inline__ int __attribute__((__always_inline__, __nodebug__))
366 _mm_ucomineq_sd(__m128d a, __m128d b)
367 {
368 return __builtin_ia32_ucomisdneq(a, b);
369 }
370
371 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
372 _mm_cvtpd_ps(__m128d a)
373 {
374 return __builtin_ia32_cvtpd2ps(a);
375 }
376
377 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
378 _mm_cvtps_pd(__m128 a)
379 {
380 return __builtin_ia32_cvtps2pd(a);
381 }
382
383 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
384 _mm_cvtepi32_pd(__m128i a)
385 {
386 return __builtin_ia32_cvtdq2pd((__v4si)a);
387 }
388
389 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
390 _mm_cvtpd_epi32(__m128d a)
391 {
392 return __builtin_ia32_cvtpd2dq(a);
393 }
394
395 static __inline__ int __attribute__((__always_inline__, __nodebug__))
396 _mm_cvtsd_si32(__m128d a)
397 {
398 return __builtin_ia32_cvtsd2si(a);
399 }
400
401 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
402 _mm_cvtsd_ss(__m128 a, __m128d b)
403 {
404 a[0] = b[0];
405 return a;
406 }
407
408 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
409 _mm_cvtsi32_sd(__m128d a, int b)
410 {
411 a[0] = b;
412 return a;
413 }
414
415 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
416 _mm_cvtss_sd(__m128d a, __m128 b)
417 {
418 a[0] = b[0];
419 return a;
420 }
421
422 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
423 _mm_cvttpd_epi32(__m128d a)
424 {
425 return (__m128i)__builtin_ia32_cvttpd2dq(a);
426 }
427
428 static __inline__ int __attribute__((__always_inline__, __nodebug__))
429 _mm_cvttsd_si32(__m128d a)
430 {
431 return a[0];
432 }
433
434 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
435 _mm_cvtpd_pi32(__m128d a)
436 {
437 return (__m64)__builtin_ia32_cvtpd2pi(a);
438 }
439
440 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
441 _mm_cvttpd_pi32(__m128d a)
442 {
443 return (__m64)__builtin_ia32_cvttpd2pi(a);
444 }
445
446 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
447 _mm_cvtpi32_pd(__m64 a)
448 {
449 return __builtin_ia32_cvtpi2pd((__v2si)a);
450 }
451
452 static __inline__ double __attribute__((__always_inline__, __nodebug__))
453 _mm_cvtsd_f64(__m128d a)
454 {
455 return a[0];
456 }
457
458 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
459 _mm_load_pd(double const *dp)
460 {
461 return *(__m128d*)dp;
462 }
463
464 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
465 _mm_load1_pd(double const *dp)
466 {
467 struct __mm_load1_pd_struct {
468 double u;
469 } __attribute__((__packed__, __may_alias__));
470 double u = ((struct __mm_load1_pd_struct*)dp)->u;
471 return (__m128d){ u, u };
472 }
473
474 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
475
476 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
477 _mm_loadr_pd(double const *dp)
478 {
479 __m128d u = *(__m128d*)dp;
480 return __builtin_shufflevector(u, u, 1, 0);
481 }
482
483 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
484 _mm_loadu_pd(double const *dp)
485 {
486 struct __loadu_pd {
487 __m128d v;
488 } __attribute__((packed, may_alias));
489 return ((struct __loadu_pd*)dp)->v;
490 }
491
492 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
493 _mm_load_sd(double const *dp)
494 {
495 struct __mm_load_sd_struct {
496 double u;
497 } __attribute__((__packed__, __may_alias__));
498 double u = ((struct __mm_load_sd_struct*)dp)->u;
499 return (__m128d){ u, 0 };
500 }
501
502 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
503 _mm_loadh_pd(__m128d a, double const *dp)
504 {
505 struct __mm_loadh_pd_struct {
506 double u;
507 } __attribute__((__packed__, __may_alias__));
508 double u = ((struct __mm_loadh_pd_struct*)dp)->u;
509 return (__m128d){ a[0], u };
510 }
511
512 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
513 _mm_loadl_pd(__m128d a, double const *dp)
514 {
515 struct __mm_loadl_pd_struct {
516 double u;
517 } __attribute__((__packed__, __may_alias__));
518 double u = ((struct __mm_loadl_pd_struct*)dp)->u;
519 return (__m128d){ u, a[1] };
520 }
521
522 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
523 _mm_set_sd(double w)
524 {
525 return (__m128d){ w, 0 };
526 }
527
528 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
529 _mm_set1_pd(double w)
530 {
531 return (__m128d){ w, w };
532 }
533
534 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
535 _mm_set_pd(double w, double x)
536 {
537 return (__m128d){ x, w };
538 }
539
540 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
541 _mm_setr_pd(double w, double x)
542 {
543 return (__m128d){ w, x };
544 }
545
546 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
547 _mm_setzero_pd(void)
548 {
549 return (__m128d){ 0, 0 };
550 }
551
552 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
553 _mm_move_sd(__m128d a, __m128d b)
554 {
555 return (__m128d){ b[0], a[1] };
556 }
557
558 static __inline__ void __attribute__((__always_inline__, __nodebug__))
559 _mm_store_sd(double *dp, __m128d a)
560 {
561 struct __mm_store_sd_struct {
562 double u;
563 } __attribute__((__packed__, __may_alias__));
564 ((struct __mm_store_sd_struct*)dp)->u = a[0];
565 }
566
567 static __inline__ void __attribute__((__always_inline__, __nodebug__))
568 _mm_store1_pd(double *dp, __m128d a)
569 {
570 struct __mm_store1_pd_struct {
571 double u[2];
572 } __attribute__((__packed__, __may_alias__));
573 ((struct __mm_store1_pd_struct*)dp)->u[0] = a[0];
574 ((struct __mm_store1_pd_struct*)dp)->u[1] = a[0];
575 }
576
577 static __inline__ void __attribute__((__always_inline__, __nodebug__))
578 _mm_store_pd(double *dp, __m128d a)
579 {
580 *(__m128d *)dp = a;
581 }
582
583 static __inline__ void __attribute__((__always_inline__, __nodebug__))
584 _mm_storeu_pd(double *dp, __m128d a)
585 {
586 __builtin_ia32_storeupd(dp, a);
587 }
588
589 static __inline__ void __attribute__((__always_inline__, __nodebug__))
590 _mm_storer_pd(double *dp, __m128d a)
591 {
592 a = __builtin_shufflevector(a, a, 1, 0);
593 *(__m128d *)dp = a;
594 }
595
596 static __inline__ void __attribute__((__always_inline__, __nodebug__))
597 _mm_storeh_pd(double *dp, __m128d a)
598 {
599 struct __mm_storeh_pd_struct {
600 double u;
601 } __attribute__((__packed__, __may_alias__));
602 ((struct __mm_storeh_pd_struct*)dp)->u = a[1];
603 }
604
605 static __inline__ void __attribute__((__always_inline__, __nodebug__))
606 _mm_storel_pd(double *dp, __m128d a)
607 {
608 struct __mm_storeh_pd_struct {
609 double u;
610 } __attribute__((__packed__, __may_alias__));
611 ((struct __mm_storeh_pd_struct*)dp)->u = a[0];
612 }
613
614 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
615 _mm_add_epi8(__m128i a, __m128i b)
616 {
617 return (__m128i)((__v16qi)a + (__v16qi)b);
618 }
619
620 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
621 _mm_add_epi16(__m128i a, __m128i b)
622 {
623 return (__m128i)((__v8hi)a + (__v8hi)b);
624 }
625
626 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
627 _mm_add_epi32(__m128i a, __m128i b)
628 {
629 return (__m128i)((__v4si)a + (__v4si)b);
630 }
631
632 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
633 _mm_add_si64(__m64 a, __m64 b)
634 {
635 return a + b;
636 }
637
638 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
639 _mm_add_epi64(__m128i a, __m128i b)
640 {
641 return a + b;
642 }
643
644 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
645 _mm_adds_epi8(__m128i a, __m128i b)
646 {
647 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
648 }
649
650 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
651 _mm_adds_epi16(__m128i a, __m128i b)
652 {
653 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
654 }
655
656 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
657 _mm_adds_epu8(__m128i a, __m128i b)
658 {
659 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
660 }
661
662 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
663 _mm_adds_epu16(__m128i a, __m128i b)
664 {
665 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
666 }
667
668 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
669 _mm_avg_epu8(__m128i a, __m128i b)
670 {
671 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
672 }
673
674 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
675 _mm_avg_epu16(__m128i a, __m128i b)
676 {
677 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
678 }
679
680 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
681 _mm_madd_epi16(__m128i a, __m128i b)
682 {
683 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
684 }
685
686 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
687 _mm_max_epi16(__m128i a, __m128i b)
688 {
689 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
690 }
691
692 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
693 _mm_max_epu8(__m128i a, __m128i b)
694 {
695 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
696 }
697
698 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
699 _mm_min_epi16(__m128i a, __m128i b)
700 {
701 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
702 }
703
704 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
705 _mm_min_epu8(__m128i a, __m128i b)
706 {
707 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
708 }
709
710 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
711 _mm_mulhi_epi16(__m128i a, __m128i b)
712 {
713 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
714 }
715
716 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
717 _mm_mulhi_epu16(__m128i a, __m128i b)
718 {
719 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
720 }
721
722 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
723 _mm_mullo_epi16(__m128i a, __m128i b)
724 {
725 return (__m128i)((__v8hi)a * (__v8hi)b);
726 }
727
728 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
729 _mm_mul_su32(__m64 a, __m64 b)
730 {
731 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
732 }
733
734 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
735 _mm_mul_epu32(__m128i a, __m128i b)
736 {
737 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
738 }
739
740 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
741 _mm_sad_epu8(__m128i a, __m128i b)
742 {
743 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
744 }
745
746 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
747 _mm_sub_epi8(__m128i a, __m128i b)
748 {
749 return (__m128i)((__v16qi)a - (__v16qi)b);
750 }
751
752 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
753 _mm_sub_epi16(__m128i a, __m128i b)
754 {
755 return (__m128i)((__v8hi)a - (__v8hi)b);
756 }
757
758 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
759 _mm_sub_epi32(__m128i a, __m128i b)
760 {
761 return (__m128i)((__v4si)a - (__v4si)b);
762 }
763
764 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
765 _mm_sub_si64(__m64 a, __m64 b)
766 {
767 return a - b;
768 }
769
770 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
771 _mm_sub_epi64(__m128i a, __m128i b)
772 {
773 return a - b;
774 }
775
776 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
777 _mm_subs_epi8(__m128i a, __m128i b)
778 {
779 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
780 }
781
782 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
783 _mm_subs_epi16(__m128i a, __m128i b)
784 {
785 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
786 }
787
788 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
789 _mm_subs_epu8(__m128i a, __m128i b)
790 {
791 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
792 }
793
794 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
795 _mm_subs_epu16(__m128i a, __m128i b)
796 {
797 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
798 }
799
800 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
801 _mm_and_si128(__m128i a, __m128i b)
802 {
803 return a & b;
804 }
805
806 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
807 _mm_andnot_si128(__m128i a, __m128i b)
808 {
809 return ~a & b;
810 }
811
812 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
813 _mm_or_si128(__m128i a, __m128i b)
814 {
815 return a | b;
816 }
817
818 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
819 _mm_xor_si128(__m128i a, __m128i b)
820 {
821 return a ^ b;
822 }
823
824 #define _mm_slli_si128(a, count) __extension__ ({ \
825 __m128i __a = (a); \
826 (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
827
828 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
829 _mm_slli_epi16(__m128i a, int count)
830 {
831 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
832 }
833
834 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
835 _mm_sll_epi16(__m128i a, __m128i count)
836 {
837 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
838 }
839
840 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
841 _mm_slli_epi32(__m128i a, int count)
842 {
843 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
844 }
845
846 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
847 _mm_sll_epi32(__m128i a, __m128i count)
848 {
849 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
850 }
851
852 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
853 _mm_slli_epi64(__m128i a, int count)
854 {
855 return __builtin_ia32_psllqi128(a, count);
856 }
857
858 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
859 _mm_sll_epi64(__m128i a, __m128i count)
860 {
861 return __builtin_ia32_psllq128(a, count);
862 }
863
864 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
865 _mm_srai_epi16(__m128i a, int count)
866 {
867 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
868 }
869
870 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
871 _mm_sra_epi16(__m128i a, __m128i count)
872 {
873 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
874 }
875
876 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
877 _mm_srai_epi32(__m128i a, int count)
878 {
879 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
880 }
881
882 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
883 _mm_sra_epi32(__m128i a, __m128i count)
884 {
885 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
886 }
887
888
889 #define _mm_srli_si128(a, count) __extension__ ({ \
890 __m128i __a = (a); \
891 (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
892
893 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
894 _mm_srli_epi16(__m128i a, int count)
895 {
896 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
897 }
898
899 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
900 _mm_srl_epi16(__m128i a, __m128i count)
901 {
902 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
903 }
904
905 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
906 _mm_srli_epi32(__m128i a, int count)
907 {
908 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
909 }
910
911 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
912 _mm_srl_epi32(__m128i a, __m128i count)
913 {
914 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
915 }
916
917 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
918 _mm_srli_epi64(__m128i a, int count)
919 {
920 return __builtin_ia32_psrlqi128(a, count);
921 }
922
923 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
924 _mm_srl_epi64(__m128i a, __m128i count)
925 {
926 return __builtin_ia32_psrlq128(a, count);
927 }
928
929 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
930 _mm_cmpeq_epi8(__m128i a, __m128i b)
931 {
932 return (__m128i)((__v16qi)a == (__v16qi)b);
933 }
934
935 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
936 _mm_cmpeq_epi16(__m128i a, __m128i b)
937 {
938 return (__m128i)((__v8hi)a == (__v8hi)b);
939 }
940
941 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
942 _mm_cmpeq_epi32(__m128i a, __m128i b)
943 {
944 return (__m128i)((__v4si)a == (__v4si)b);
945 }
946
947 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
948 _mm_cmpgt_epi8(__m128i a, __m128i b)
949 {
950 /* This function always performs a signed comparison, but __v16qi is a char
951 which may be signed or unsigned. */
952 typedef signed char __v16qs __attribute__((__vector_size__(16)));
953 return (__m128i)((__v16qs)a > (__v16qs)b);
954 }
955
956 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
957 _mm_cmpgt_epi16(__m128i a, __m128i b)
958 {
959 return (__m128i)((__v8hi)a > (__v8hi)b);
960 }
961
962 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
963 _mm_cmpgt_epi32(__m128i a, __m128i b)
964 {
965 return (__m128i)((__v4si)a > (__v4si)b);
966 }
967
968 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
969 _mm_cmplt_epi8(__m128i a, __m128i b)
970 {
971 return _mm_cmpgt_epi8(b,a);
972 }
973
974 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
975 _mm_cmplt_epi16(__m128i a, __m128i b)
976 {
977 return _mm_cmpgt_epi16(b,a);
978 }
979
980 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
981 _mm_cmplt_epi32(__m128i a, __m128i b)
982 {
983 return _mm_cmpgt_epi32(b,a);
984 }
985
986 #ifdef __x86_64__
987 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
988 _mm_cvtsi64_sd(__m128d a, long long b)
989 {
990 a[0] = b;
991 return a;
992 }
993
994 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
995 _mm_cvtsd_si64(__m128d a)
996 {
997 return __builtin_ia32_cvtsd2si64(a);
998 }
999
1000 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1001 _mm_cvttsd_si64(__m128d a)
1002 {
1003 return a[0];
1004 }
1005 #endif
1006
1007 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1008 _mm_cvtepi32_ps(__m128i a)
1009 {
1010 return __builtin_ia32_cvtdq2ps((__v4si)a);
1011 }
1012
1013 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1014 _mm_cvtps_epi32(__m128 a)
1015 {
1016 return (__m128i)__builtin_ia32_cvtps2dq(a);
1017 }
1018
1019 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1020 _mm_cvttps_epi32(__m128 a)
1021 {
1022 return (__m128i)__builtin_ia32_cvttps2dq(a);
1023 }
1024
1025 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1026 _mm_cvtsi32_si128(int a)
1027 {
1028 return (__m128i)(__v4si){ a, 0, 0, 0 };
1029 }
1030
1031 #ifdef __x86_64__
1032 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1033 _mm_cvtsi64_si128(long long a)
1034 {
1035 return (__m128i){ a, 0 };
1036 }
1037 #endif
1038
1039 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1040 _mm_cvtsi128_si32(__m128i a)
1041 {
1042 __v4si b = (__v4si)a;
1043 return b[0];
1044 }
1045
1046 #ifdef __x86_64__
1047 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1048 _mm_cvtsi128_si64(__m128i a)
1049 {
1050 return a[0];
1051 }
1052 #endif
1053
1054 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1055 _mm_load_si128(__m128i const *p)
1056 {
1057 return *p;
1058 }
1059
1060 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1061 _mm_loadu_si128(__m128i const *p)
1062 {
1063 struct __loadu_si128 {
1064 __m128i v;
1065 } __attribute__((packed, may_alias));
1066 return ((struct __loadu_si128*)p)->v;
1067 }
1068
1069 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1070 _mm_loadl_epi64(__m128i const *p)
1071 {
1072 struct __mm_loadl_epi64_struct {
1073 long long u;
1074 } __attribute__((__packed__, __may_alias__));
1075 return (__m128i) { ((struct __mm_loadl_epi64_struct*)p)->u, 0};
1076 }
1077
1078 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1079 _mm_set_epi64x(long long q1, long long q0)
1080 {
1081 return (__m128i){ q0, q1 };
1082 }
1083
1084 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1085 _mm_set_epi64(__m64 q1, __m64 q0)
1086 {
1087 return (__m128i){ (long long)q0, (long long)q1 };
1088 }
1089
1090 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1091 _mm_set_epi32(int i3, int i2, int i1, int i0)
1092 {
1093 return (__m128i)(__v4si){ i0, i1, i2, i3};
1094 }
1095
1096 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1097 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1098 {
1099 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1100 }
1101
1102 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1103 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9 , char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b 0)
1104 {
1105 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1106 }
1107
1108 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1109 _mm_set1_epi64x(long long q)
1110 {
1111 return (__m128i){ q, q };
1112 }
1113
1114 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1115 _mm_set1_epi64(__m64 q)
1116 {
1117 return (__m128i){ (long long)q, (long long)q };
1118 }
1119
1120 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1121 _mm_set1_epi32(int i)
1122 {
1123 return (__m128i)(__v4si){ i, i, i, i };
1124 }
1125
1126 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1127 _mm_set1_epi16(short w)
1128 {
1129 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1130 }
1131
1132 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1133 _mm_set1_epi8(char b)
1134 {
1135 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1136 }
1137
1138 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1139 _mm_setr_epi64(__m64 q0, __m64 q1)
1140 {
1141 return (__m128i){ (long long)q0, (long long)q1 };
1142 }
1143
1144 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1145 _mm_setr_epi32(int i0, int i1, int i2, int i3)
1146 {
1147 return (__m128i)(__v4si){ i0, i1, i2, i3};
1148 }
1149
1150 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1151 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1152 {
1153 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1154 }
1155
1156 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1157 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, cha r b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b 15)
1158 {
1159 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1160 }
1161
1162 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1163 _mm_setzero_si128(void)
1164 {
1165 return (__m128i){ 0LL, 0LL };
1166 }
1167
1168 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1169 _mm_store_si128(__m128i *p, __m128i b)
1170 {
1171 *p = b;
1172 }
1173
1174 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1175 _mm_storeu_si128(__m128i *p, __m128i b)
1176 {
1177 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1178 }
1179
1180 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1181 _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1182 {
1183 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1184 }
1185
1186 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1187 _mm_storel_epi64(__m128i *p, __m128i a)
1188 {
1189 struct __mm_storel_epi64_struct {
1190 long long u;
1191 } __attribute__((__packed__, __may_alias__));
1192 ((struct __mm_storel_epi64_struct*)p)->u = a[0];
1193 }
1194
1195 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1196 _mm_stream_pd(double *p, __m128d a)
1197 {
1198 __builtin_ia32_movntpd(p, a);
1199 }
1200
1201 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1202 _mm_stream_si128(__m128i *p, __m128i a)
1203 {
1204 __builtin_ia32_movntdq(p, a);
1205 }
1206
1207 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1208 _mm_stream_si32(int *p, int a)
1209 {
1210 __builtin_ia32_movnti(p, a);
1211 }
1212
1213 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1214 _mm_clflush(void const *p)
1215 {
1216 __builtin_ia32_clflush(p);
1217 }
1218
1219 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1220 _mm_lfence(void)
1221 {
1222 __builtin_ia32_lfence();
1223 }
1224
1225 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1226 _mm_mfence(void)
1227 {
1228 __builtin_ia32_mfence();
1229 }
1230
1231 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1232 _mm_packs_epi16(__m128i a, __m128i b)
1233 {
1234 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1235 }
1236
1237 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1238 _mm_packs_epi32(__m128i a, __m128i b)
1239 {
1240 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1241 }
1242
1243 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1244 _mm_packus_epi16(__m128i a, __m128i b)
1245 {
1246 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1247 }
1248
1249 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1250 _mm_extract_epi16(__m128i a, int imm)
1251 {
1252 __v8hi b = (__v8hi)a;
1253 return (unsigned short)b[imm];
1254 }
1255
1256 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1257 _mm_insert_epi16(__m128i a, int b, int imm)
1258 {
1259 __v8hi c = (__v8hi)a;
1260 c[imm & 7] = b;
1261 return (__m128i)c;
1262 }
1263
1264 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1265 _mm_movemask_epi8(__m128i a)
1266 {
1267 return __builtin_ia32_pmovmskb128((__v16qi)a);
1268 }
1269
1270 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1271 __m128i __a = (a); \
1272 (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1273 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1274 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1275
1276 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1277 __m128i __a = (a); \
1278 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1279 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1280 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1281 4, 5, 6, 7); })
1282
1283 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1284 __m128i __a = (a); \
1285 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1286 0, 1, 2, 3, \
1287 4 + (((imm) & 0x03) >> 0), \
1288 4 + (((imm) & 0x0c) >> 2), \
1289 4 + (((imm) & 0x30) >> 4), \
1290 4 + (((imm) & 0xc0) >> 6)); })
1291
1292 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1293 _mm_unpackhi_epi8(__m128i a, __m128i b)
1294 {
1295 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16 +9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1296 }
1297
1298 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1299 _mm_unpackhi_epi16(__m128i a, __m128i b)
1300 {
1301 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1302 }
1303
1304 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1305 _mm_unpackhi_epi32(__m128i a, __m128i b)
1306 {
1307 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
1308 }
1309
1310 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1311 _mm_unpackhi_epi64(__m128i a, __m128i b)
1312 {
1313 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
1314 }
1315
1316 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1317 _mm_unpacklo_epi8(__m128i a, __m128i b)
1318 {
1319 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16 +1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1320 }
1321
1322 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1323 _mm_unpacklo_epi16(__m128i a, __m128i b)
1324 {
1325 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1326 }
1327
1328 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1329 _mm_unpacklo_epi32(__m128i a, __m128i b)
1330 {
1331 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
1332 }
1333
1334 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1335 _mm_unpacklo_epi64(__m128i a, __m128i b)
1336 {
1337 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
1338 }
1339
1340 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
1341 _mm_movepi64_pi64(__m128i a)
1342 {
1343 return (__m64)a[0];
1344 }
1345
1346 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1347 _mm_movpi64_pi64(__m64 a)
1348 {
1349 return (__m128i){ (long long)a, 0 };
1350 }
1351
1352 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1353 _mm_move_epi64(__m128i a)
1354 {
1355 return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
1356 }
1357
1358 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1359 _mm_unpackhi_pd(__m128d a, __m128d b)
1360 {
1361 return __builtin_shufflevector(a, b, 1, 2+1);
1362 }
1363
1364 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1365 _mm_unpacklo_pd(__m128d a, __m128d b)
1366 {
1367 return __builtin_shufflevector(a, b, 0, 2+0);
1368 }
1369
1370 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1371 _mm_movemask_pd(__m128d a)
1372 {
1373 return __builtin_ia32_movmskpd(a);
1374 }
1375
1376 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1377 __m128d __a = (a); \
1378 __m128d __b = (b); \
1379 __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
1380
1381 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1382 _mm_castpd_ps(__m128d in)
1383 {
1384 return (__m128)in;
1385 }
1386
1387 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1388 _mm_castpd_si128(__m128d in)
1389 {
1390 return (__m128i)in;
1391 }
1392
1393 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1394 _mm_castps_pd(__m128 in)
1395 {
1396 return (__m128d)in;
1397 }
1398
1399 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1400 _mm_castps_si128(__m128 in)
1401 {
1402 return (__m128i)in;
1403 }
1404
1405 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1406 _mm_castsi128_ps(__m128i in)
1407 {
1408 return (__m128)in;
1409 }
1410
1411 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1412 _mm_castsi128_pd(__m128i in)
1413 {
1414 return (__m128d)in;
1415 }
1416
1417 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1418 _mm_pause(void)
1419 {
1420 __asm__ volatile ("pause");
1421 }
1422
1423 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1424
1425 #endif /* __SSE2__ */
1426
1427 #endif /* __EMMINTRIN_H */
OLDNEW
« no previous file with comments | « Linux_ia32/lib/clang/3.2/include/cpuid.h ('k') | Linux_ia32/lib/clang/3.2/include/f16cintrin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698