third_party/asan/asan_clang_Linux/lib/clang/3.0/include/smmintrin.h - Issue 8404033: New ASan binaries for Linux (r946)

Side by Side Diff: third_party/asan/asan_clang_Linux/lib/clang/3.0/include/smmintrin.h

Issue 8404033: New ASan binaries for Linux (r946) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/

Patch Set: Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « third_party/asan/asan_clang_Linux/lib/clang/3.0/include/pmmintrin.h ('k') | third_party/asan/asan_clang_Linux/lib/clang/3.0/include/stdarg.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===

2 *

3 * Permission is hereby granted, free of charge, to any person obtaining a copy

4 * of this software and associated documentation files (the "Software"), to deal

5 * in the Software without restriction, including without limitation the rights

6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

7 * copies of the Software, and to permit persons to whom the Software is

8 * furnished to do so, subject to the following conditions:

9 *

10 * The above copyright notice and this permission notice shall be included in

11 * all copies or substantial portions of the Software.

12 *

13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

19 * THE SOFTWARE.

20 *

21 *===-----------------------------------------------------------------------===

22 */

23

24 #ifndef _SMMINTRIN_H

25 #define _SMMINTRIN_H

26

27 #ifndef __SSE4_1__

28 #error "SSE4.1 instruction set not enabled"

29 #else

30

31 #include <tmmintrin.h>

32

33 /* SSE4 Rounding macros. */

34 #define _MM_FROUND_TO_NEAREST_INT 0x00

35 #define _MM_FROUND_TO_NEG_INF 0x01

36 #define _MM_FROUND_TO_POS_INF 0x02

37 #define _MM_FROUND_TO_ZERO 0x03

38 #define _MM_FROUND_CUR_DIRECTION 0x04

39

40 #define _MM_FROUND_RAISE_EXC 0x00

41 #define _MM_FROUND_NO_EXC 0x08

42

43 #define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_NEAREST_INT)

44 #define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_NEG_INF)

45 #define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_POS_INF)

46 #define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC \| _MM_FROUND_TO_ZERO)

47 #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC \| _MM_FROUND_CUR_DIRECTION)

48 #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC \| _MM_FROUND_CUR_DIRECTION)

49

50 #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)

51 #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)

52 #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)

53 #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)

54

55 #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)

56 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)

57 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)

58 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)

59

60 #define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y))

61 #define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M))

62 #define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M))

63 #define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M))

64

65 /* SSE4 Packed Blending Intrinsics. */

66 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))

67 _mm_blend_pd (__m128d __V1, __m128d __V2, const int __M)

68 {

69 return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M);

70 }

71

72 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))

73 _mm_blend_ps (__m128 __V1, __m128 __V2, const int __M)

74 {

75 return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M);

76 }

77

78 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))

79 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)

80 {

81 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,

82 (__v2df)__M);

83 }

84

85 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))

86 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)

87 {

88 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,

89 (__v4sf)__M);

90 }

91

92 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

93 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)

94 {

95 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,

96 (__v16qi)__M);

97 }

98

99 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

100 _mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M)

101 {

102 return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M);

103 }

104

105 /* SSE4 Dword Multiply Instructions. */

106 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

107 _mm_mullo_epi32 (__m128i __V1, __m128i __V2)

108 {

109 return (__m128i) ((__v4si)__V1 * (__v4si)__V2);

110 }

111

112 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

113 _mm_mul_epi32 (__m128i __V1, __m128i __V2)

114 {

115 return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);

116 }

117

118 /* SSE4 Floating Point Dot Product Instructions. */

119 #define _mm_dp_ps(X, Y, M) __builtin_ia32_dpps ((X), (Y), (M))

120 #define _mm_dp_pd(X, Y, M) __builtin_ia32_dppd ((X), (Y), (M))

121

122 /* SSE4 Streaming Load Hint Instruction. */

123 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

124 _mm_stream_load_si128 (__m128i *__V)

125 {

126 return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);

127 }

128

129 /* SSE4 Packed Integer Min/Max Instructions. */

130 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

131 _mm_min_epi8 (__m128i __V1, __m128i __V2)

132 {

133 return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);

134 }

135

136 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

137 _mm_max_epi8 (__m128i __V1, __m128i __V2)

138 {

139 return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);

140 }

141

142 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

143 _mm_min_epu16 (__m128i __V1, __m128i __V2)

144 {

145 return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);

146 }

147

148 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

149 _mm_max_epu16 (__m128i __V1, __m128i __V2)

150 {

151 return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);

152 }

153

154 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

155 _mm_min_epi32 (__m128i __V1, __m128i __V2)

156 {

157 return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);

158 }

159

160 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

161 _mm_max_epi32 (__m128i __V1, __m128i __V2)

162 {

163 return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);

164 }

165

166 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

167 _mm_min_epu32 (__m128i __V1, __m128i __V2)

168 {

169 return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);

170 }

171

172 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

173 _mm_max_epu32 (__m128i __V1, __m128i __V2)

174 {

175 return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);

176 }

177

178 /* SSE4 Insertion and Extraction from XMM Register Instructions. */

179 #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))

180 #define _mm_extract_ps(X, N) (__extension__ \

181 ({ union { int i; float f; } __t; \

182 __v4sf __a = (__v4sf)(X); \

183 __t.f = __a[N]; \

184 __t.i;}))

185

186 /* Miscellaneous insert and extract macros. */

187 /* Extract a single-precision float from X at index N into D. */

188 #define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \

189 (D) = __a[N]; }))

190

191 /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create

192 an index suitable for _mm_insert_ps. */

193 #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) \| ((Y) << 4) \| (Z))

194

195 /* Extract a float from X at index N into the first index of the return. */

196 #define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \

197 _MM_MK_INSERTPS_NDX((N), 0, 0x0e))

198

199 /* Insert int into packed integer array at index. */

200 #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \

201 __a[N] = I; \

202 __a;}))

203 #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \

204 __a[N] = I; \

205 __a;}))

206 #ifdef __x86_64__

207 #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \

208 __a[N] = I; \

209 __a;}))

210 #endif /* __x86_64__ */

211

212 /* Extract int from packed integer array at index. This returns the element

213 * as a zero extended value, so it is unsigned.

214 */

215 #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \

216 (unsigned char)__a[N];}))

217 #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \

218 (unsigned)__a[N];}))

219 #ifdef __x86_64__

220 #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \

221 __a[N];}))

222 #endif /* __x86_64 */

223

224 /* SSE4 128-bit Packed Integer Comparisons. */

225 static __inline__ int __attribute__((__always_inline__, __nodebug__))

226 _mm_testz_si128(__m128i __M, __m128i __V)

227 {

228 return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);

229 }

230

231 static __inline__ int __attribute__((__always_inline__, __nodebug__))

232 _mm_testc_si128(__m128i __M, __m128i __V)

233 {

234 return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);

235 }

236

237 static __inline__ int __attribute__((__always_inline__, __nodebug__))

238 _mm_testnzc_si128(__m128i __M, __m128i __V)

239 {

240 return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);

241 }

242

243 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))

244 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))

245 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V))

246

247 /* SSE4 64-bit Packed Integer Comparisons. */

248 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

249 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)

250 {

251 return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);

252 }

253

254 /* SSE4 Packed Integer Sign-Extension. */

255 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

256 _mm_cvtepi8_epi16(__m128i __V)

257 {

258 return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);

259 }

260

261 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

262 _mm_cvtepi8_epi32(__m128i __V)

263 {

264 return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);

265 }

266

267 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

268 _mm_cvtepi8_epi64(__m128i __V)

269 {

270 return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);

271 }

272

273 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

274 _mm_cvtepi16_epi32(__m128i __V)

275 {

276 return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V);

277 }

278

279 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

280 _mm_cvtepi16_epi64(__m128i __V)

281 {

282 return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);

283 }

284

285 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

286 _mm_cvtepi32_epi64(__m128i __V)

287 {

288 return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);

289 }

290

291 /* SSE4 Packed Integer Zero-Extension. */

292 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

293 _mm_cvtepu8_epi16(__m128i __V)

294 {

295 return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);

296 }

297

298 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

299 _mm_cvtepu8_epi32(__m128i __V)

300 {

301 return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);

302 }

303

304 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

305 _mm_cvtepu8_epi64(__m128i __V)

306 {

307 return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);

308 }

309

310 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

311 _mm_cvtepu16_epi32(__m128i __V)

312 {

313 return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);

314 }

315

316 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

317 _mm_cvtepu16_epi64(__m128i __V)

318 {

319 return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);

320 }

321

322 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

323 _mm_cvtepu32_epi64(__m128i __V)

324 {

325 return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);

326 }

327

328 /* SSE4 Pack with Unsigned Saturation. */

329 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

330 _mm_packus_epi32(__m128i __V1, __m128i __V2)

331 {

332 return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);

333 }

334

335 /* SSE4 Multiple Packed Sums of Absolute Difference. */

336 #define _mm_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw128((X), (Y), (M))

337

338 /* These definitions are normally in nmmintrin.h, but gcc puts them in here

339 so we'll do the same. */

340 #ifdef __SSE4_2__

341

342 /* These specify the type of data that we're comparing. */

343 #define _SIDD_UBYTE_OPS 0x00

344 #define _SIDD_UWORD_OPS 0x01

345 #define _SIDD_SBYTE_OPS 0x02

346 #define _SIDD_SWORD_OPS 0x03

347

348 /* These specify the type of comparison operation. */

349 #define _SIDD_CMP_EQUAL_ANY 0x00

350 #define _SIDD_CMP_RANGES 0x04

351 #define _SIDD_CMP_EQUAL_EACH 0x08

352 #define _SIDD_CMP_EQUAL_ORDERED 0x0c

353

354 /* These macros specify the polarity of the operation. */

355 #define _SIDD_POSITIVE_POLARITY 0x00

356 #define _SIDD_NEGATIVE_POLARITY 0x10

357 #define _SIDD_MASKED_POSITIVE_POLARITY 0x20

358 #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30

359

360 /* These macros are used in _mm_cmpXstri() to specify the return. */

361 #define _SIDD_LEAST_SIGNIFICANT 0x00

362 #define _SIDD_MOST_SIGNIFICANT 0x40

363

364 /* These macros are used in _mm_cmpXstri() to specify the return. */

365 #define _SIDD_BIT_MASK 0x00

366 #define _SIDD_UNIT_MASK 0x40

367

368 /* SSE4.2 Packed Comparison Intrinsics. */

369 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))

370 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))

371

372 #define _mm_cmpestrm(A, LA, B, LB, M) \

373 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))

374 #define _mm_cmpestri(X, LX, Y, LY, M) \

375 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))

376

377 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */

378 #define _mm_cmpistra(A, LA, B, LB, M) \

379 __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M))

380 #define _mm_cmpistrc(A, LA, B, LB, M) \

381 __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M))

382 #define _mm_cmpistro(A, LA, B, LB, M) \

383 __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M))

384 #define _mm_cmpistrs(A, LA, B, LB, M) \

385 __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M))

386 #define _mm_cmpistrz(A, LA, B, LB, M) \

387 __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M))

388

389 #define _mm_cmpestra(A, LA, B, LB, M) \

390 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))

391 #define _mm_cmpestrc(A, LA, B, LB, M) \

392 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))

393 #define _mm_cmpestro(A, LA, B, LB, M) \

394 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))

395 #define _mm_cmpestrs(A, LA, B, LB, M) \

396 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))

397 #define _mm_cmpestrz(A, LA, B, LB, M) \

398 __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))

399

400 /* SSE4.2 Compare Packed Data -- Greater Than. */

401 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))

402 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)

403 {

404 return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2);

405 }

406

407 /* SSE4.2 Accumulate CRC32. */

408 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))

409 _mm_crc32_u8(unsigned int __C, unsigned char __D)

410 {

411 return __builtin_ia32_crc32qi(__C, __D);

412 }

413

414 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))

415 _mm_crc32_u16(unsigned int __C, unsigned short __D)

416 {

417 return __builtin_ia32_crc32hi(__C, __D);

418 }

419

420 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))

421 _mm_crc32_u32(unsigned int __C, unsigned int __D)

422 {

423 return __builtin_ia32_crc32si(__C, __D);

424 }

425

426 #ifdef __x86_64__

427 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug __))

428 _mm_crc32_u64(unsigned long long __C, unsigned long long __D)

429 {

430 return __builtin_ia32_crc32di(__C, __D);

431 }

432 #endif /* __x86_64__ */

433

434 /* SSE4.2 Population Count. */

435 static __inline__ int __attribute__((__always_inline__, __nodebug__))

436 _mm_popcnt_u32(unsigned int __A)

437 {

438 return __builtin_popcount(__A);

439 }

440

441 #ifdef __x86_64__

442 static __inline__ long long __attribute__((__always_inline__, __nodebug__))

443 _mm_popcnt_u64(unsigned long long __A)

444 {

445 return __builtin_popcountll(__A);

446 }

447 #endif /* __x86_64__ */

448

449 #endif /* __SSE4_2__ */

450 #endif /* __SSE4_1__ */

451

452 #endif /* _SMMINTRIN_H */

OLD	NEW