Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: Linux_x64/lib/clang/3.4/include/emmintrin.h

Issue 228113004: roll llvm-allocated-type 186332:204777. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/llvm-allocated-type/
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Linux_x64/lib/clang/3.4/include/cpuid.h ('k') | Linux_x64/lib/clang/3.4/include/f16cintrin.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26
27 #ifndef __SSE2__
28 #error "SSE2 instruction set not enabled"
29 #else
30
31 #include <xmmintrin.h>
32
33 typedef double __m128d __attribute__((__vector_size__(16)));
34 typedef long long __m128i __attribute__((__vector_size__(16)));
35
36 /* Type defines. */
37 typedef double __v2df __attribute__ ((__vector_size__ (16)));
38 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39 typedef short __v8hi __attribute__((__vector_size__(16)));
40 typedef char __v16qi __attribute__((__vector_size__(16)));
41
42 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
43 _mm_add_sd(__m128d __a, __m128d __b)
44 {
45 __a[0] += __b[0];
46 return __a;
47 }
48
49 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
50 _mm_add_pd(__m128d __a, __m128d __b)
51 {
52 return __a + __b;
53 }
54
55 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
56 _mm_sub_sd(__m128d __a, __m128d __b)
57 {
58 __a[0] -= __b[0];
59 return __a;
60 }
61
62 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
63 _mm_sub_pd(__m128d __a, __m128d __b)
64 {
65 return __a - __b;
66 }
67
68 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
69 _mm_mul_sd(__m128d __a, __m128d __b)
70 {
71 __a[0] *= __b[0];
72 return __a;
73 }
74
75 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
76 _mm_mul_pd(__m128d __a, __m128d __b)
77 {
78 return __a * __b;
79 }
80
81 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
82 _mm_div_sd(__m128d __a, __m128d __b)
83 {
84 __a[0] /= __b[0];
85 return __a;
86 }
87
88 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
89 _mm_div_pd(__m128d __a, __m128d __b)
90 {
91 return __a / __b;
92 }
93
94 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
95 _mm_sqrt_sd(__m128d __a, __m128d __b)
96 {
97 __m128d __c = __builtin_ia32_sqrtsd(__b);
98 return (__m128d) { __c[0], __a[1] };
99 }
100
101 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102 _mm_sqrt_pd(__m128d __a)
103 {
104 return __builtin_ia32_sqrtpd(__a);
105 }
106
107 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
108 _mm_min_sd(__m128d __a, __m128d __b)
109 {
110 return __builtin_ia32_minsd(__a, __b);
111 }
112
113 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114 _mm_min_pd(__m128d __a, __m128d __b)
115 {
116 return __builtin_ia32_minpd(__a, __b);
117 }
118
119 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
120 _mm_max_sd(__m128d __a, __m128d __b)
121 {
122 return __builtin_ia32_maxsd(__a, __b);
123 }
124
125 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126 _mm_max_pd(__m128d __a, __m128d __b)
127 {
128 return __builtin_ia32_maxpd(__a, __b);
129 }
130
131 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
132 _mm_and_pd(__m128d __a, __m128d __b)
133 {
134 return (__m128d)((__v4si)__a & (__v4si)__b);
135 }
136
137 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138 _mm_andnot_pd(__m128d __a, __m128d __b)
139 {
140 return (__m128d)(~(__v4si)__a & (__v4si)__b);
141 }
142
143 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
144 _mm_or_pd(__m128d __a, __m128d __b)
145 {
146 return (__m128d)((__v4si)__a | (__v4si)__b);
147 }
148
149 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150 _mm_xor_pd(__m128d __a, __m128d __b)
151 {
152 return (__m128d)((__v4si)__a ^ (__v4si)__b);
153 }
154
155 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
156 _mm_cmpeq_pd(__m128d __a, __m128d __b)
157 {
158 return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
159 }
160
161 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
162 _mm_cmplt_pd(__m128d __a, __m128d __b)
163 {
164 return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
165 }
166
167 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
168 _mm_cmple_pd(__m128d __a, __m128d __b)
169 {
170 return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
171 }
172
173 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
174 _mm_cmpgt_pd(__m128d __a, __m128d __b)
175 {
176 return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
177 }
178
179 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
180 _mm_cmpge_pd(__m128d __a, __m128d __b)
181 {
182 return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
183 }
184
185 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
186 _mm_cmpord_pd(__m128d __a, __m128d __b)
187 {
188 return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
189 }
190
191 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
192 _mm_cmpunord_pd(__m128d __a, __m128d __b)
193 {
194 return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
195 }
196
197 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
198 _mm_cmpneq_pd(__m128d __a, __m128d __b)
199 {
200 return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
201 }
202
203 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
204 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
205 {
206 return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
207 }
208
209 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
210 _mm_cmpnle_pd(__m128d __a, __m128d __b)
211 {
212 return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
213 }
214
215 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
216 _mm_cmpngt_pd(__m128d __a, __m128d __b)
217 {
218 return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
219 }
220
221 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
222 _mm_cmpnge_pd(__m128d __a, __m128d __b)
223 {
224 return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
225 }
226
227 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
228 _mm_cmpeq_sd(__m128d __a, __m128d __b)
229 {
230 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
231 }
232
233 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
234 _mm_cmplt_sd(__m128d __a, __m128d __b)
235 {
236 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
237 }
238
239 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
240 _mm_cmple_sd(__m128d __a, __m128d __b)
241 {
242 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
243 }
244
245 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
246 _mm_cmpgt_sd(__m128d __a, __m128d __b)
247 {
248 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
249 return (__m128d) { __c[0], __a[1] };
250 }
251
252 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
253 _mm_cmpge_sd(__m128d __a, __m128d __b)
254 {
255 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
256 return (__m128d) { __c[0], __a[1] };
257 }
258
259 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
260 _mm_cmpord_sd(__m128d __a, __m128d __b)
261 {
262 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
263 }
264
265 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
266 _mm_cmpunord_sd(__m128d __a, __m128d __b)
267 {
268 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
269 }
270
271 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
272 _mm_cmpneq_sd(__m128d __a, __m128d __b)
273 {
274 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
275 }
276
277 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
278 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
279 {
280 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
281 }
282
283 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
284 _mm_cmpnle_sd(__m128d __a, __m128d __b)
285 {
286 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
287 }
288
289 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
290 _mm_cmpngt_sd(__m128d __a, __m128d __b)
291 {
292 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
293 return (__m128d) { __c[0], __a[1] };
294 }
295
296 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
297 _mm_cmpnge_sd(__m128d __a, __m128d __b)
298 {
299 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
300 return (__m128d) { __c[0], __a[1] };
301 }
302
303 static __inline__ int __attribute__((__always_inline__, __nodebug__))
304 _mm_comieq_sd(__m128d __a, __m128d __b)
305 {
306 return __builtin_ia32_comisdeq(__a, __b);
307 }
308
309 static __inline__ int __attribute__((__always_inline__, __nodebug__))
310 _mm_comilt_sd(__m128d __a, __m128d __b)
311 {
312 return __builtin_ia32_comisdlt(__a, __b);
313 }
314
315 static __inline__ int __attribute__((__always_inline__, __nodebug__))
316 _mm_comile_sd(__m128d __a, __m128d __b)
317 {
318 return __builtin_ia32_comisdle(__a, __b);
319 }
320
321 static __inline__ int __attribute__((__always_inline__, __nodebug__))
322 _mm_comigt_sd(__m128d __a, __m128d __b)
323 {
324 return __builtin_ia32_comisdgt(__a, __b);
325 }
326
327 static __inline__ int __attribute__((__always_inline__, __nodebug__))
328 _mm_comige_sd(__m128d __a, __m128d __b)
329 {
330 return __builtin_ia32_comisdge(__a, __b);
331 }
332
333 static __inline__ int __attribute__((__always_inline__, __nodebug__))
334 _mm_comineq_sd(__m128d __a, __m128d __b)
335 {
336 return __builtin_ia32_comisdneq(__a, __b);
337 }
338
339 static __inline__ int __attribute__((__always_inline__, __nodebug__))
340 _mm_ucomieq_sd(__m128d __a, __m128d __b)
341 {
342 return __builtin_ia32_ucomisdeq(__a, __b);
343 }
344
345 static __inline__ int __attribute__((__always_inline__, __nodebug__))
346 _mm_ucomilt_sd(__m128d __a, __m128d __b)
347 {
348 return __builtin_ia32_ucomisdlt(__a, __b);
349 }
350
351 static __inline__ int __attribute__((__always_inline__, __nodebug__))
352 _mm_ucomile_sd(__m128d __a, __m128d __b)
353 {
354 return __builtin_ia32_ucomisdle(__a, __b);
355 }
356
357 static __inline__ int __attribute__((__always_inline__, __nodebug__))
358 _mm_ucomigt_sd(__m128d __a, __m128d __b)
359 {
360 return __builtin_ia32_ucomisdgt(__a, __b);
361 }
362
363 static __inline__ int __attribute__((__always_inline__, __nodebug__))
364 _mm_ucomige_sd(__m128d __a, __m128d __b)
365 {
366 return __builtin_ia32_ucomisdge(__a, __b);
367 }
368
369 static __inline__ int __attribute__((__always_inline__, __nodebug__))
370 _mm_ucomineq_sd(__m128d __a, __m128d __b)
371 {
372 return __builtin_ia32_ucomisdneq(__a, __b);
373 }
374
375 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
376 _mm_cvtpd_ps(__m128d __a)
377 {
378 return __builtin_ia32_cvtpd2ps(__a);
379 }
380
381 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
382 _mm_cvtps_pd(__m128 __a)
383 {
384 return __builtin_ia32_cvtps2pd(__a);
385 }
386
387 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
388 _mm_cvtepi32_pd(__m128i __a)
389 {
390 return __builtin_ia32_cvtdq2pd((__v4si)__a);
391 }
392
393 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
394 _mm_cvtpd_epi32(__m128d __a)
395 {
396 return __builtin_ia32_cvtpd2dq(__a);
397 }
398
399 static __inline__ int __attribute__((__always_inline__, __nodebug__))
400 _mm_cvtsd_si32(__m128d __a)
401 {
402 return __builtin_ia32_cvtsd2si(__a);
403 }
404
405 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
406 _mm_cvtsd_ss(__m128 __a, __m128d __b)
407 {
408 __a[0] = __b[0];
409 return __a;
410 }
411
412 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
413 _mm_cvtsi32_sd(__m128d __a, int __b)
414 {
415 __a[0] = __b;
416 return __a;
417 }
418
419 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
420 _mm_cvtss_sd(__m128d __a, __m128 __b)
421 {
422 __a[0] = __b[0];
423 return __a;
424 }
425
426 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
427 _mm_cvttpd_epi32(__m128d __a)
428 {
429 return (__m128i)__builtin_ia32_cvttpd2dq(__a);
430 }
431
432 static __inline__ int __attribute__((__always_inline__, __nodebug__))
433 _mm_cvttsd_si32(__m128d __a)
434 {
435 return __a[0];
436 }
437
438 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
439 _mm_cvtpd_pi32(__m128d __a)
440 {
441 return (__m64)__builtin_ia32_cvtpd2pi(__a);
442 }
443
444 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
445 _mm_cvttpd_pi32(__m128d __a)
446 {
447 return (__m64)__builtin_ia32_cvttpd2pi(__a);
448 }
449
450 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
451 _mm_cvtpi32_pd(__m64 __a)
452 {
453 return __builtin_ia32_cvtpi2pd((__v2si)__a);
454 }
455
456 static __inline__ double __attribute__((__always_inline__, __nodebug__))
457 _mm_cvtsd_f64(__m128d __a)
458 {
459 return __a[0];
460 }
461
462 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
463 _mm_load_pd(double const *__dp)
464 {
465 return *(__m128d*)__dp;
466 }
467
468 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
469 _mm_load1_pd(double const *__dp)
470 {
471 struct __mm_load1_pd_struct {
472 double __u;
473 } __attribute__((__packed__, __may_alias__));
474 double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
475 return (__m128d){ __u, __u };
476 }
477
478 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
479
480 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
481 _mm_loadr_pd(double const *__dp)
482 {
483 __m128d __u = *(__m128d*)__dp;
484 return __builtin_shufflevector(__u, __u, 1, 0);
485 }
486
487 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
488 _mm_loadu_pd(double const *__dp)
489 {
490 struct __loadu_pd {
491 __m128d __v;
492 } __attribute__((packed, may_alias));
493 return ((struct __loadu_pd*)__dp)->__v;
494 }
495
496 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
497 _mm_load_sd(double const *__dp)
498 {
499 struct __mm_load_sd_struct {
500 double __u;
501 } __attribute__((__packed__, __may_alias__));
502 double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
503 return (__m128d){ __u, 0 };
504 }
505
506 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
507 _mm_loadh_pd(__m128d __a, double const *__dp)
508 {
509 struct __mm_loadh_pd_struct {
510 double __u;
511 } __attribute__((__packed__, __may_alias__));
512 double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
513 return (__m128d){ __a[0], __u };
514 }
515
516 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
517 _mm_loadl_pd(__m128d __a, double const *__dp)
518 {
519 struct __mm_loadl_pd_struct {
520 double __u;
521 } __attribute__((__packed__, __may_alias__));
522 double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
523 return (__m128d){ __u, __a[1] };
524 }
525
526 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
527 _mm_set_sd(double __w)
528 {
529 return (__m128d){ __w, 0 };
530 }
531
532 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
533 _mm_set1_pd(double __w)
534 {
535 return (__m128d){ __w, __w };
536 }
537
538 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
539 _mm_set_pd(double __w, double __x)
540 {
541 return (__m128d){ __x, __w };
542 }
543
544 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
545 _mm_setr_pd(double __w, double __x)
546 {
547 return (__m128d){ __w, __x };
548 }
549
550 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
551 _mm_setzero_pd(void)
552 {
553 return (__m128d){ 0, 0 };
554 }
555
556 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
557 _mm_move_sd(__m128d __a, __m128d __b)
558 {
559 return (__m128d){ __b[0], __a[1] };
560 }
561
562 static __inline__ void __attribute__((__always_inline__, __nodebug__))
563 _mm_store_sd(double *__dp, __m128d __a)
564 {
565 struct __mm_store_sd_struct {
566 double __u;
567 } __attribute__((__packed__, __may_alias__));
568 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
569 }
570
571 static __inline__ void __attribute__((__always_inline__, __nodebug__))
572 _mm_store1_pd(double *__dp, __m128d __a)
573 {
574 struct __mm_store1_pd_struct {
575 double __u[2];
576 } __attribute__((__packed__, __may_alias__));
577 ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
578 ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
579 }
580
581 static __inline__ void __attribute__((__always_inline__, __nodebug__))
582 _mm_store_pd(double *__dp, __m128d __a)
583 {
584 *(__m128d *)__dp = __a;
585 }
586
587 static __inline__ void __attribute__((__always_inline__, __nodebug__))
588 _mm_storeu_pd(double *__dp, __m128d __a)
589 {
590 __builtin_ia32_storeupd(__dp, __a);
591 }
592
593 static __inline__ void __attribute__((__always_inline__, __nodebug__))
594 _mm_storer_pd(double *__dp, __m128d __a)
595 {
596 __a = __builtin_shufflevector(__a, __a, 1, 0);
597 *(__m128d *)__dp = __a;
598 }
599
600 static __inline__ void __attribute__((__always_inline__, __nodebug__))
601 _mm_storeh_pd(double *__dp, __m128d __a)
602 {
603 struct __mm_storeh_pd_struct {
604 double __u;
605 } __attribute__((__packed__, __may_alias__));
606 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
607 }
608
609 static __inline__ void __attribute__((__always_inline__, __nodebug__))
610 _mm_storel_pd(double *__dp, __m128d __a)
611 {
612 struct __mm_storeh_pd_struct {
613 double __u;
614 } __attribute__((__packed__, __may_alias__));
615 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
616 }
617
618 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
619 _mm_add_epi8(__m128i __a, __m128i __b)
620 {
621 return (__m128i)((__v16qi)__a + (__v16qi)__b);
622 }
623
624 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
625 _mm_add_epi16(__m128i __a, __m128i __b)
626 {
627 return (__m128i)((__v8hi)__a + (__v8hi)__b);
628 }
629
630 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
631 _mm_add_epi32(__m128i __a, __m128i __b)
632 {
633 return (__m128i)((__v4si)__a + (__v4si)__b);
634 }
635
636 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
637 _mm_add_si64(__m64 __a, __m64 __b)
638 {
639 return __a + __b;
640 }
641
642 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
643 _mm_add_epi64(__m128i __a, __m128i __b)
644 {
645 return __a + __b;
646 }
647
648 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
649 _mm_adds_epi8(__m128i __a, __m128i __b)
650 {
651 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
652 }
653
654 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
655 _mm_adds_epi16(__m128i __a, __m128i __b)
656 {
657 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
658 }
659
660 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
661 _mm_adds_epu8(__m128i __a, __m128i __b)
662 {
663 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
664 }
665
666 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
667 _mm_adds_epu16(__m128i __a, __m128i __b)
668 {
669 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
670 }
671
672 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
673 _mm_avg_epu8(__m128i __a, __m128i __b)
674 {
675 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
676 }
677
678 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
679 _mm_avg_epu16(__m128i __a, __m128i __b)
680 {
681 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
682 }
683
684 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
685 _mm_madd_epi16(__m128i __a, __m128i __b)
686 {
687 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
688 }
689
690 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
691 _mm_max_epi16(__m128i __a, __m128i __b)
692 {
693 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
694 }
695
696 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
697 _mm_max_epu8(__m128i __a, __m128i __b)
698 {
699 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
700 }
701
702 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
703 _mm_min_epi16(__m128i __a, __m128i __b)
704 {
705 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
706 }
707
708 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
709 _mm_min_epu8(__m128i __a, __m128i __b)
710 {
711 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
712 }
713
714 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
715 _mm_mulhi_epi16(__m128i __a, __m128i __b)
716 {
717 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
718 }
719
720 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
721 _mm_mulhi_epu16(__m128i __a, __m128i __b)
722 {
723 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
724 }
725
726 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
727 _mm_mullo_epi16(__m128i __a, __m128i __b)
728 {
729 return (__m128i)((__v8hi)__a * (__v8hi)__b);
730 }
731
732 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
733 _mm_mul_su32(__m64 __a, __m64 __b)
734 {
735 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
736 }
737
738 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
739 _mm_mul_epu32(__m128i __a, __m128i __b)
740 {
741 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
742 }
743
744 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
745 _mm_sad_epu8(__m128i __a, __m128i __b)
746 {
747 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
748 }
749
750 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
751 _mm_sub_epi8(__m128i __a, __m128i __b)
752 {
753 return (__m128i)((__v16qi)__a - (__v16qi)__b);
754 }
755
756 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
757 _mm_sub_epi16(__m128i __a, __m128i __b)
758 {
759 return (__m128i)((__v8hi)__a - (__v8hi)__b);
760 }
761
762 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
763 _mm_sub_epi32(__m128i __a, __m128i __b)
764 {
765 return (__m128i)((__v4si)__a - (__v4si)__b);
766 }
767
768 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
769 _mm_sub_si64(__m64 __a, __m64 __b)
770 {
771 return __a - __b;
772 }
773
774 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
775 _mm_sub_epi64(__m128i __a, __m128i __b)
776 {
777 return __a - __b;
778 }
779
780 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
781 _mm_subs_epi8(__m128i __a, __m128i __b)
782 {
783 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
784 }
785
786 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
787 _mm_subs_epi16(__m128i __a, __m128i __b)
788 {
789 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
790 }
791
792 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
793 _mm_subs_epu8(__m128i __a, __m128i __b)
794 {
795 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
796 }
797
798 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
799 _mm_subs_epu16(__m128i __a, __m128i __b)
800 {
801 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
802 }
803
804 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
805 _mm_and_si128(__m128i __a, __m128i __b)
806 {
807 return __a & __b;
808 }
809
810 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
811 _mm_andnot_si128(__m128i __a, __m128i __b)
812 {
813 return ~__a & __b;
814 }
815
816 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
817 _mm_or_si128(__m128i __a, __m128i __b)
818 {
819 return __a | __b;
820 }
821
822 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
823 _mm_xor_si128(__m128i __a, __m128i __b)
824 {
825 return __a ^ __b;
826 }
827
828 #define _mm_slli_si128(a, count) __extension__ ({ \
829 __m128i __a = (a); \
830 (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
831
832 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
833 _mm_slli_epi16(__m128i __a, int __count)
834 {
835 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
836 }
837
838 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
839 _mm_sll_epi16(__m128i __a, __m128i __count)
840 {
841 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
842 }
843
844 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
845 _mm_slli_epi32(__m128i __a, int __count)
846 {
847 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
848 }
849
850 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
851 _mm_sll_epi32(__m128i __a, __m128i __count)
852 {
853 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
854 }
855
856 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
857 _mm_slli_epi64(__m128i __a, int __count)
858 {
859 return __builtin_ia32_psllqi128(__a, __count);
860 }
861
862 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
863 _mm_sll_epi64(__m128i __a, __m128i __count)
864 {
865 return __builtin_ia32_psllq128(__a, __count);
866 }
867
868 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
869 _mm_srai_epi16(__m128i __a, int __count)
870 {
871 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
872 }
873
874 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
875 _mm_sra_epi16(__m128i __a, __m128i __count)
876 {
877 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
878 }
879
880 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
881 _mm_srai_epi32(__m128i __a, int __count)
882 {
883 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
884 }
885
886 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
887 _mm_sra_epi32(__m128i __a, __m128i __count)
888 {
889 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
890 }
891
892
893 #define _mm_srli_si128(a, count) __extension__ ({ \
894 __m128i __a = (a); \
895 (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
896
897 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
898 _mm_srli_epi16(__m128i __a, int __count)
899 {
900 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
901 }
902
903 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
904 _mm_srl_epi16(__m128i __a, __m128i __count)
905 {
906 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
907 }
908
909 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
910 _mm_srli_epi32(__m128i __a, int __count)
911 {
912 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
913 }
914
915 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
916 _mm_srl_epi32(__m128i __a, __m128i __count)
917 {
918 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
919 }
920
921 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
922 _mm_srli_epi64(__m128i __a, int __count)
923 {
924 return __builtin_ia32_psrlqi128(__a, __count);
925 }
926
927 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
928 _mm_srl_epi64(__m128i __a, __m128i __count)
929 {
930 return __builtin_ia32_psrlq128(__a, __count);
931 }
932
933 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
934 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
935 {
936 return (__m128i)((__v16qi)__a == (__v16qi)__b);
937 }
938
939 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
940 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
941 {
942 return (__m128i)((__v8hi)__a == (__v8hi)__b);
943 }
944
945 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
946 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
947 {
948 return (__m128i)((__v4si)__a == (__v4si)__b);
949 }
950
951 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
952 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
953 {
954 /* This function always performs a signed comparison, but __v16qi is a char
955 which may be signed or unsigned. */
956 typedef signed char __v16qs __attribute__((__vector_size__(16)));
957 return (__m128i)((__v16qs)__a > (__v16qs)__b);
958 }
959
960 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
961 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
962 {
963 return (__m128i)((__v8hi)__a > (__v8hi)__b);
964 }
965
966 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
967 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
968 {
969 return (__m128i)((__v4si)__a > (__v4si)__b);
970 }
971
972 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
973 _mm_cmplt_epi8(__m128i __a, __m128i __b)
974 {
975 return _mm_cmpgt_epi8(__b, __a);
976 }
977
978 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
979 _mm_cmplt_epi16(__m128i __a, __m128i __b)
980 {
981 return _mm_cmpgt_epi16(__b, __a);
982 }
983
984 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
985 _mm_cmplt_epi32(__m128i __a, __m128i __b)
986 {
987 return _mm_cmpgt_epi32(__b, __a);
988 }
989
990 #ifdef __x86_64__
991 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
992 _mm_cvtsi64_sd(__m128d __a, long long __b)
993 {
994 __a[0] = __b;
995 return __a;
996 }
997
998 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
999 _mm_cvtsd_si64(__m128d __a)
1000 {
1001 return __builtin_ia32_cvtsd2si64(__a);
1002 }
1003
1004 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1005 _mm_cvttsd_si64(__m128d __a)
1006 {
1007 return __a[0];
1008 }
1009 #endif
1010
1011 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1012 _mm_cvtepi32_ps(__m128i __a)
1013 {
1014 return __builtin_ia32_cvtdq2ps((__v4si)__a);
1015 }
1016
1017 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1018 _mm_cvtps_epi32(__m128 __a)
1019 {
1020 return (__m128i)__builtin_ia32_cvtps2dq(__a);
1021 }
1022
1023 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1024 _mm_cvttps_epi32(__m128 __a)
1025 {
1026 return (__m128i)__builtin_ia32_cvttps2dq(__a);
1027 }
1028
1029 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1030 _mm_cvtsi32_si128(int __a)
1031 {
1032 return (__m128i)(__v4si){ __a, 0, 0, 0 };
1033 }
1034
1035 #ifdef __x86_64__
1036 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1037 _mm_cvtsi64_si128(long long __a)
1038 {
1039 return (__m128i){ __a, 0 };
1040 }
1041 #endif
1042
1043 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1044 _mm_cvtsi128_si32(__m128i __a)
1045 {
1046 __v4si __b = (__v4si)__a;
1047 return __b[0];
1048 }
1049
1050 #ifdef __x86_64__
1051 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
1052 _mm_cvtsi128_si64(__m128i __a)
1053 {
1054 return __a[0];
1055 }
1056 #endif
1057
1058 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1059 _mm_load_si128(__m128i const *__p)
1060 {
1061 return *__p;
1062 }
1063
1064 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1065 _mm_loadu_si128(__m128i const *__p)
1066 {
1067 struct __loadu_si128 {
1068 __m128i __v;
1069 } __attribute__((packed, may_alias));
1070 return ((struct __loadu_si128*)__p)->__v;
1071 }
1072
1073 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1074 _mm_loadl_epi64(__m128i const *__p)
1075 {
1076 struct __mm_loadl_epi64_struct {
1077 long long __u;
1078 } __attribute__((__packed__, __may_alias__));
1079 return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1080 }
1081
1082 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1083 _mm_set_epi64x(long long q1, long long q0)
1084 {
1085 return (__m128i){ q0, q1 };
1086 }
1087
1088 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1089 _mm_set_epi64(__m64 q1, __m64 q0)
1090 {
1091 return (__m128i){ (long long)q0, (long long)q1 };
1092 }
1093
1094 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1095 _mm_set_epi32(int i3, int i2, int i1, int i0)
1096 {
1097 return (__m128i)(__v4si){ i0, i1, i2, i3};
1098 }
1099
1100 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1101 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1102 {
1103 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1104 }
1105
1106 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1107 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9 , char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b 0)
1108 {
1109 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1110 }
1111
1112 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1113 _mm_set1_epi64x(long long __q)
1114 {
1115 return (__m128i){ __q, __q };
1116 }
1117
1118 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1119 _mm_set1_epi64(__m64 __q)
1120 {
1121 return (__m128i){ (long long)__q, (long long)__q };
1122 }
1123
1124 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1125 _mm_set1_epi32(int __i)
1126 {
1127 return (__m128i)(__v4si){ __i, __i, __i, __i };
1128 }
1129
1130 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1131 _mm_set1_epi16(short __w)
1132 {
1133 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1134 }
1135
1136 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1137 _mm_set1_epi8(char __b)
1138 {
1139 return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, _ _b, __b, __b, __b, __b, __b };
1140 }
1141
1142 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1143 _mm_setr_epi64(__m64 q0, __m64 q1)
1144 {
1145 return (__m128i){ (long long)q0, (long long)q1 };
1146 }
1147
1148 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1149 _mm_setr_epi32(int i0, int i1, int i2, int i3)
1150 {
1151 return (__m128i)(__v4si){ i0, i1, i2, i3};
1152 }
1153
1154 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1155 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1156 {
1157 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1158 }
1159
1160 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1161 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, cha r b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b 15)
1162 {
1163 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b 12, b13, b14, b15 };
1164 }
1165
1166 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1167 _mm_setzero_si128(void)
1168 {
1169 return (__m128i){ 0LL, 0LL };
1170 }
1171
1172 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1173 _mm_store_si128(__m128i *__p, __m128i __b)
1174 {
1175 *__p = __b;
1176 }
1177
1178 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1179 _mm_storeu_si128(__m128i *__p, __m128i __b)
1180 {
1181 __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
1182 }
1183
1184 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1185 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
1186 {
1187 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1188 }
1189
1190 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1191 _mm_storel_epi64(__m128i *__p, __m128i __a)
1192 {
1193 struct __mm_storel_epi64_struct {
1194 long long __u;
1195 } __attribute__((__packed__, __may_alias__));
1196 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1197 }
1198
1199 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1200 _mm_stream_pd(double *__p, __m128d __a)
1201 {
1202 __builtin_ia32_movntpd(__p, __a);
1203 }
1204
1205 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1206 _mm_stream_si128(__m128i *__p, __m128i __a)
1207 {
1208 __builtin_ia32_movntdq(__p, __a);
1209 }
1210
1211 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1212 _mm_stream_si32(int *__p, int __a)
1213 {
1214 __builtin_ia32_movnti(__p, __a);
1215 }
1216
1217 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1218 _mm_clflush(void const *__p)
1219 {
1220 __builtin_ia32_clflush(__p);
1221 }
1222
1223 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1224 _mm_lfence(void)
1225 {
1226 __builtin_ia32_lfence();
1227 }
1228
1229 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1230 _mm_mfence(void)
1231 {
1232 __builtin_ia32_mfence();
1233 }
1234
1235 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1236 _mm_packs_epi16(__m128i __a, __m128i __b)
1237 {
1238 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1239 }
1240
1241 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1242 _mm_packs_epi32(__m128i __a, __m128i __b)
1243 {
1244 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1245 }
1246
1247 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1248 _mm_packus_epi16(__m128i __a, __m128i __b)
1249 {
1250 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1251 }
1252
1253 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1254 _mm_extract_epi16(__m128i __a, int __imm)
1255 {
1256 __v8hi __b = (__v8hi)__a;
1257 return (unsigned short)__b[__imm];
1258 }
1259
1260 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1261 _mm_insert_epi16(__m128i __a, int __b, int __imm)
1262 {
1263 __v8hi __c = (__v8hi)__a;
1264 __c[__imm & 7] = __b;
1265 return (__m128i)__c;
1266 }
1267
1268 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1269 _mm_movemask_epi8(__m128i __a)
1270 {
1271 return __builtin_ia32_pmovmskb128((__v16qi)__a);
1272 }
1273
1274 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1275 __m128i __a = (a); \
1276 (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1277 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1278 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1279
1280 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1281 __m128i __a = (a); \
1282 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1283 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1284 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1285 4, 5, 6, 7); })
1286
1287 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1288 __m128i __a = (a); \
1289 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1290 0, 1, 2, 3, \
1291 4 + (((imm) & 0x03) >> 0), \
1292 4 + (((imm) & 0x0c) >> 2), \
1293 4 + (((imm) & 0x30) >> 4), \
1294 4 + (((imm) & 0xc0) >> 6)); })
1295
1296 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1297 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
1298 {
1299 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9 , 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1300 }
1301
1302 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1303 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
1304 {
1305 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8 +5, 6, 8+6, 7, 8+7);
1306 }
1307
1308 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1309 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
1310 {
1311 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4 +3);
1312 }
1313
1314 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1315 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
1316 {
1317 return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1318 }
1319
1320 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1321 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
1322 {
1323 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1 , 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1324 }
1325
1326 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1327 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
1328 {
1329 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8 +1, 2, 8+2, 3, 8+3);
1330 }
1331
1332 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1333 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
1334 {
1335 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4 +1);
1336 }
1337
1338 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1339 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
1340 {
1341 return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1342 }
1343
1344 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
1345 _mm_movepi64_pi64(__m128i __a)
1346 {
1347 return (__m64)__a[0];
1348 }
1349
1350 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1351 _mm_movpi64_pi64(__m64 __a)
1352 {
1353 return (__m128i){ (long long)__a, 0 };
1354 }
1355
1356 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1357 _mm_move_epi64(__m128i __a)
1358 {
1359 return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1360 }
1361
1362 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1363 _mm_unpackhi_pd(__m128d __a, __m128d __b)
1364 {
1365 return __builtin_shufflevector(__a, __b, 1, 2+1);
1366 }
1367
1368 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1369 _mm_unpacklo_pd(__m128d __a, __m128d __b)
1370 {
1371 return __builtin_shufflevector(__a, __b, 0, 2+0);
1372 }
1373
1374 static __inline__ int __attribute__((__always_inline__, __nodebug__))
1375 _mm_movemask_pd(__m128d __a)
1376 {
1377 return __builtin_ia32_movmskpd(__a);
1378 }
1379
1380 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1381 __m128d __a = (a); \
1382 __m128d __b = (b); \
1383 __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
1384
1385 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1386 _mm_castpd_ps(__m128d __a)
1387 {
1388 return (__m128)__a;
1389 }
1390
1391 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1392 _mm_castpd_si128(__m128d __a)
1393 {
1394 return (__m128i)__a;
1395 }
1396
1397 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1398 _mm_castps_pd(__m128 __a)
1399 {
1400 return (__m128d)__a;
1401 }
1402
1403 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1404 _mm_castps_si128(__m128 __a)
1405 {
1406 return (__m128i)__a;
1407 }
1408
1409 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1410 _mm_castsi128_ps(__m128i __a)
1411 {
1412 return (__m128)__a;
1413 }
1414
1415 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
1416 _mm_castsi128_pd(__m128i __a)
1417 {
1418 return (__m128d)__a;
1419 }
1420
1421 static __inline__ void __attribute__((__always_inline__, __nodebug__))
1422 _mm_pause(void)
1423 {
1424 __asm__ volatile ("pause");
1425 }
1426
1427 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1428
1429 #endif /* __SSE2__ */
1430
1431 #endif /* __EMMINTRIN_H */
OLDNEW
« no previous file with comments | « Linux_x64/lib/clang/3.4/include/cpuid.h ('k') | Linux_x64/lib/clang/3.4/include/f16cintrin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698