asan/asan_clang_Darwin/lib/clang/3.1/include/avxintrin.h - Issue 9034019: Roll the new ASan binaries for Mac OS

Side by Side Diff: asan/asan_clang_Darwin/lib/clang/3.1/include/avxintrin.h

Issue 9034019: Roll the new ASan binaries for Mac OS (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/

Patch Set: Created 8 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*===---- avxintrin.h - AVX intrinsics -------------------------------------===	1 /*===---- avxintrin.h - AVX intrinsics -------------------------------------===

2 *	2 *

3 * Permission is hereby granted, free of charge, to any person obtaining a copy	3 * Permission is hereby granted, free of charge, to any person obtaining a copy

4 * of this software and associated documentation files (the "Software"), to deal	4 * of this software and associated documentation files (the "Software"), to deal

5 * in the Software without restriction, including without limitation the rights	5 * in the Software without restriction, including without limitation the rights

6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell	6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

7 * copies of the Software, and to permit persons to whom the Software is	7 * copies of the Software, and to permit persons to whom the Software is

8 * furnished to do so, subject to the following conditions:	8 * furnished to do so, subject to the following conditions:

9 *	9 *

10 * The above copyright notice and this permission notice shall be included in	10 * The above copyright notice and this permission notice shall be included in

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138 {	138 {

139 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)a);	139 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)a);

140 }	140 }

141	141

142 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	142 static __inline __m256 __attribute__((__always_inline__, __nodebug__))

143 _mm256_rcp_ps(__m256 a)	143 _mm256_rcp_ps(__m256 a)

144 {	144 {

145 return (__m256)__builtin_ia32_rcpps256((__v8sf)a);	145 return (__m256)__builtin_ia32_rcpps256((__v8sf)a);

146 }	146 }

147	147

148 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	148 #define _mm256_round_pd(V, M) __extension__ ({ \

149 _mm256_round_pd(__m256d v, const int m)	149 __m256d __V = (V); \

150 {	150 (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })

151 return (__m256d)__builtin_ia32_roundpd256((__v4df)v, m);

152 }

153	151

154 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	152 #define _mm256_round_ps(V, M) __extension__ ({ \

155 _mm256_round_ps(__m256 v, const int m)	153 __m256 __V = (V); \

156 {	154 (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })

157 return (__m256)__builtin_ia32_roundps256((__v8sf)v, m);

158 }

159	155

160 #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)	156 #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)

161 #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)	157 #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)

162 #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)	158 #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)

163 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)	159 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)

164	160

165 /* Logical */	161 /* Logical */

166 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	162 static __inline __m256d __attribute__((__always_inline__, __nodebug__))

167 _mm256_and_pd(__m256d a, __m256d b)	163 _mm256_and_pd(__m256d a, __m256d b)

168 {	164 {

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
255 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)a, (__v4si)c);	251 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)a, (__v4si)c);

256 }	252 }

257	253

258 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	254 static __inline __m256 __attribute__((__always_inline__, __nodebug__))

259 _mm256_permutevar_ps(__m256 a, __m256i c)	255 _mm256_permutevar_ps(__m256 a, __m256i c)

260 {	256 {

261 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)a,	257 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)a,

262 (__v8si)c);	258 (__v8si)c);

263 }	259 }

264	260

265 static __inline __m128d __attribute__((__always_inline__, __nodebug__))	261 #define _mm_permute_pd(A, C) __extension__ ({ \

266 _mm_permute_pd(__m128d a, const int c)	262 __m128d __A = (A); \

267 {	263 (__m128d)__builtin_ia32_vpermilpd((__v2df)__A, (C)); })

268 return (__m128d)__builtin_ia32_vpermilpd((__v2df)a, c);

269 }

270	264

271 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	265 #define _mm256_permute_pd(A, C) __extension__ ({ \

272 _mm256_permute_pd(__m256d a, const int c)	266 __m256d __A = (A); \

273 {	267 (__m256d)__builtin_ia32_vpermilpd256((__v4df)__A, (C)); })

274 return (__m256d)__builtin_ia32_vpermilpd256((__v4df)a, c);

275 }

276	268

277 static __inline __m128 __attribute__((__always_inline__, __nodebug__))	269 #define _mm_permute_ps(A, C) __extension__ ({ \

278 _mm_permute_ps(__m128 a, const int c)	270 __m128 __A = (A); \

279 {	271 (__m128)__builtin_ia32_vpermilps((__v4sf)__A, (C)); })

280 return (__m128)__builtin_ia32_vpermilps((__v4sf)a, c);

281 }

282	272

283 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	273 #define _mm256_permute_ps(A, C) __extension__ ({ \

284 _mm256_permute_ps(__m256 a, const int c)	274 __m256 __A = (A); \

285 {	275 (__m256)__builtin_ia32_vpermilps256((__v8sf)__A, (C)); })

286 return (__m256)__builtin_ia32_vpermilps256((__v8sf)a, c);

287 }

288	276

289 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	277 #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \

290 _mm256_permute2f128_pd(__m256d a, __m256d b, const int c)	278 __m256d __V1 = (V1); \

291 {	279 __m256d __V2 = (V2); \

292 return (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)a, (__v4df)b, c);	280 (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })

293 }

294	281

295 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	282 #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \

296 _mm256_permute2f128_ps(__m256 a, __m256 b, const int c)	283 __m256 __V1 = (V1); \

297 {	284 __m256 __V2 = (V2); \

298 return (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)a, (__v8sf)b, c);	285 (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })

299 }

300	286

301 static __inline __m256i __attribute__((__always_inline__, __nodebug__))	287 #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \

302 _mm256_permute2f128_si256(__m256i a, __m256i b, const int c)	288 __m256i __V1 = (V1); \

303 {	289 __m256i __V2 = (V2); \

304 return (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)a, (__v8si)b, c);	290 (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })

305 }

306	291

307 /* Vector Blend */	292 /* Vector Blend */

308 #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \	293 #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \

309 __m256d __V1 = (V1); \	294 __m256d __V1 = (V1); \

310 __m256d __V2 = (V2); \	295 __m256d __V2 = (V2); \

311 (__m256d)__builtin_ia32_blendpd256((__v4df)__V1, (__v4df)__V2, M); })	296 (__m256d)__builtin_ia32_blendpd256((__v4df)__V1, (__v4df)__V2, (M)); })

312	297

313 #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \	298 #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \

314 __m256 __V1 = (V1); \	299 __m256 __V1 = (V1); \

315 __m256 __V2 = (V2); \	300 __m256 __V2 = (V2); \

316 (__m256)__builtin_ia32_blendps256((__v8sf)__V1, (__v8sf)__V2, M); })	301 (__m256)__builtin_ia32_blendps256((__v8sf)__V1, (__v8sf)__V2, (M)); })

317	302

318 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	303 static __inline __m256d __attribute__((__always_inline__, __nodebug__))

319 _mm256_blendv_pd(__m256d a, __m256d b, __m256d c)	304 _mm256_blendv_pd(__m256d a, __m256d b, __m256d c)

320 {	305 {

321 return (__m256d)__builtin_ia32_blendvpd256((__v4df)a, (__v4df)b, (__v4df)c);	306 return (__m256d)__builtin_ia32_blendvpd256((__v4df)a, (__v4df)b, (__v4df)c);

322 }	307 }

323	308

324 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	309 static __inline __m256 __attribute__((__always_inline__, __nodebug__))

325 _mm256_blendv_ps(__m256 a, __m256 b, __m256 c)	310 _mm256_blendv_ps(__m256 a, __m256 b, __m256 c)

326 {	311 {

327 return (__m256)__builtin_ia32_blendvps256((__v8sf)a, (__v8sf)b, (__v8sf)c);	312 return (__m256)__builtin_ia32_blendvps256((__v8sf)a, (__v8sf)b, (__v8sf)c);

328 }	313 }

329	314

330 /* Vector Dot Product */	315 /* Vector Dot Product */

331 #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \	316 #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \

332 __m256 __V1 = (V1); \	317 __m256 __V1 = (V1); \

333 __m256 __V2 = (V2); \	318 __m256 __V2 = (V2); \

334 (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, M); })	319 (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })

335	320

336 /* Vector shuffle */	321 /* Vector shuffle */

337 #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \	322 #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \

338 __m256 __a = (a); \	323 __m256 __a = (a); \

339 __m256 __b = (b); \	324 __m256 __b = (b); \

340 (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \	325 (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \

341 (mask) & 0x3, ((mask) & 0xc) >> 2, \	326 (mask) & 0x3, ((mask) & 0xc) >> 2, \

342 (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \	327 (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \

343 ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \	328 ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \

344 (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })	329 (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
410 __m128d __a = (a); \	395 __m128d __a = (a); \

411 __m128d __b = (b); \	396 __m128d __b = (b); \

412 (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })	397 (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })

413	398

414 #define _mm_cmp_ss(a, b, c) __extension__ ({ \	399 #define _mm_cmp_ss(a, b, c) __extension__ ({ \

415 __m128 __a = (a); \	400 __m128 __a = (a); \

416 __m128 __b = (b); \	401 __m128 __b = (b); \

417 (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })	402 (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })

418	403

419 /* Vector extract */	404 /* Vector extract */

420 static __inline __m128d __attribute__((__always_inline__, __nodebug__))	405 #define _mm256_extractf128_pd(A, O) __extension__ ({ \

421 _mm256_extractf128_pd(__m256d a, const int o)	406 __m256d __A = (A); \

422 {	407 (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)__A, (O)); })

423 return (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)a, o);

424 }

425	408

426 static __inline __m128 __attribute__((__always_inline__, __nodebug__))	409 #define _mm256_extractf128_ps(A, O) __extension__ ({ \

427 _mm256_extractf128_ps(__m256 a, const int o)	410 __m256 __A = (A); \

428 {	411 (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)__A, (O)); })

429 return (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)a, o);

430 }

431	412

432 static __inline __m128i __attribute__((__always_inline__, __nodebug__))	413 #define _mm256_extractf128_si256(A, O) __extension__ ({ \

433 _mm256_extractf128_si256(__m256i a, const int o)	414 __m256i __A = (A); \

434 {	415 (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); })

435 return (__m128i)__builtin_ia32_vextractf128_si256((__v8si)a, o);

436 }

437	416

438 static __inline int __attribute__((__always_inline__, __nodebug__))	417 static __inline int __attribute__((__always_inline__, __nodebug__))

439 _mm256_extract_epi32(__m256i a, int const imm)	418 _mm256_extract_epi32(__m256i a, int const imm)

440 {	419 {

441 __v8si b = (__v8si)a;	420 __v8si b = (__v8si)a;

442 return b[imm];	421 return b[imm];

443 }	422 }

444	423

445 static __inline int __attribute__((__always_inline__, __nodebug__))	424 static __inline int __attribute__((__always_inline__, __nodebug__))

446 _mm256_extract_epi16(__m256i a, int const imm)	425 _mm256_extract_epi16(__m256i a, int const imm)

(...skipping 12 matching lines...) Expand all Loading...
459 #ifdef __x86_64__	438 #ifdef __x86_64__

460 static __inline long long __attribute__((__always_inline__, __nodebug__))	439 static __inline long long __attribute__((__always_inline__, __nodebug__))

461 _mm256_extract_epi64(__m256i a, const int imm)	440 _mm256_extract_epi64(__m256i a, const int imm)

462 {	441 {

463 __v4di b = (__v4di)a;	442 __v4di b = (__v4di)a;

464 return b[imm];	443 return b[imm];

465 }	444 }

466 #endif	445 #endif

467	446

468 /* Vector insert */	447 /* Vector insert */

469 static __inline __m256d __attribute__((__always_inline__, __nodebug__))	448 #define _mm256_insertf128_pd(V1, V2, O) __extension__ ({ \

470 _mm256_insertf128_pd(__m256d a, __m128d b, const int o)	449 __m256d __V1 = (V1); \

471 {	450 __m128d __V2 = (V2); \

472 return (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)a, (__v2df)b, o);	451 (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)__V1, (__v2df)__V2, (O)); })

473 }

474	452

475 static __inline __m256 __attribute__((__always_inline__, __nodebug__))	453 #define _mm256_insertf128_ps(V1, V2, O) __extension__ ({ \

476 _mm256_insertf128_ps(__m256 a, __m128 b, const int o)	454 __m256 __V1 = (V1); \

477 {	455 __m128 __V2 = (V2); \

478 return (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)a, (__v4sf)b, o);	456 (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)__V1, (__v4sf)__V2, (O)); })

479 }

480	457

481 static __inline __m256i __attribute__((__always_inline__, __nodebug__))	458 #define _mm256_insertf128_si256(V1, V2, O) __extension__ ({ \

482 _mm256_insertf128_si256(__m256i a, __m128i b, const int o)	459 __m256i __V1 = (V1); \

483 {	460 __m128i __V2 = (V2); \

484 return (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)a, (__v4si)b, o);	461 (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)__V1, (__v4si)__V2, (O)); })

485 }

486	462

487 static __inline __m256i __attribute__((__always_inline__, __nodebug__))	463 static __inline __m256i __attribute__((__always_inline__, __nodebug__))

488 _mm256_insert_epi32(__m256i a, int b, int const imm)	464 _mm256_insert_epi32(__m256i a, int b, int const imm)

489 {	465 {

490 __v8si c = (__v8si)a;	466 __v8si c = (__v8si)a;

491 c[imm & 7] = b;	467 c[imm & 7] = b;

492 return (__m256i)c;	468 return (__m256i)c;

493 }	469 }

494	470

495 static __inline __m256i __attribute__((__always_inline__, __nodebug__))	471 static __inline __m256i __attribute__((__always_inline__, __nodebug__))

(...skipping 646 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1142 __m128 zero = _mm_setzero_ps();	1118 __m128 zero = _mm_setzero_ps();

1143 return __builtin_shufflevector(in, zero, 0, 1, 2, 3, 4, 4, 4, 4);	1119 return __builtin_shufflevector(in, zero, 0, 1, 2, 3, 4, 4, 4, 4);

1144 }	1120 }

1145	1121

1146 static __inline __m256i __attribute__((__always_inline__, __nodebug__))	1122 static __inline __m256i __attribute__((__always_inline__, __nodebug__))

1147 _mm256_castsi128_si256(__m128i in)	1123 _mm256_castsi128_si256(__m128i in)

1148 {	1124 {

1149 __m128i zero = _mm_setzero_si128();	1125 __m128i zero = _mm_setzero_si128();

1150 return __builtin_shufflevector(in, zero, 0, 1, 2, 2);	1126 return __builtin_shufflevector(in, zero, 0, 1, 2, 2);

1151 }	1127 }

OLD	NEW