OLD | NEW |
1 ; This tests each of the supported NaCl atomic instructions for every | 1 ; This tests each of the supported NaCl atomic instructions for every |
2 ; size allowed. | 2 ; size allowed. |
3 | 3 |
4 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ | 4 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ |
5 ; RUN: | FileCheck %s | 5 ; RUN: | FileCheck %s |
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ |
7 ; RUN: | FileCheck --check-prefix=O2 %s | 7 ; RUN: | FileCheck --check-prefix=O2 %s |
8 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \ | 8 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \ |
9 ; RUN: | FileCheck %s | 9 ; RUN: | FileCheck %s |
10 | 10 |
(...skipping 10 matching lines...) Expand all Loading... |
21 declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32) | 21 declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32) |
22 declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) | 22 declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) |
23 declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32) | 23 declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32) |
24 declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32) | 24 declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32) |
25 declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32) | 25 declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32) |
26 declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32) | 26 declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32) |
27 declare void @llvm.nacl.atomic.fence(i32) | 27 declare void @llvm.nacl.atomic.fence(i32) |
28 declare void @llvm.nacl.atomic.fence.all() | 28 declare void @llvm.nacl.atomic.fence.all() |
29 declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*) | 29 declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*) |
30 | 30 |
| 31 @Global8 = internal global [1 x i8] zeroinitializer, align 1 |
| 32 @Global16 = internal global [2 x i8] zeroinitializer, align 2 |
| 33 @Global32 = internal global [4 x i8] zeroinitializer, align 4 |
| 34 @Global64 = internal global [8 x i8] zeroinitializer, align 8 |
| 35 |
31 ; NOTE: The LLC equivalent for 16-bit atomic operations are expanded | 36 ; NOTE: The LLC equivalent for 16-bit atomic operations are expanded |
32 ; as 32-bit operations. For Subzero, assume that real 16-bit operations | 37 ; as 32-bit operations. For Subzero, assume that real 16-bit operations |
33 ; will be usable (the validator will be fixed): | 38 ; will be usable (the validator will be fixed): |
34 ; https://code.google.com/p/nativeclient/issues/detail?id=2981 | 39 ; https://code.google.com/p/nativeclient/issues/detail?id=2981 |
35 | 40 |
36 ;;; Load | 41 ;;; Load |
37 | 42 |
38 ; x86 guarantees load/store to be atomic if naturally aligned. | 43 ; x86 guarantees load/store to be atomic if naturally aligned. |
39 ; The PNaCl IR requires all atomic accesses to be naturally aligned. | 44 ; The PNaCl IR requires all atomic accesses to be naturally aligned. |
40 | 45 |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 ; clobbered in the loop, and the RHS needs to be remain live. | 241 ; clobbered in the loop, and the RHS needs to be remain live. |
237 ; CHECK: add ebx,{{.*e.[^x]}} | 242 ; CHECK: add ebx,{{.*e.[^x]}} |
238 ; CHECK: mov ecx,edx | 243 ; CHECK: mov ecx,edx |
239 ; CHECK: adc ecx,{{.*e.[^x]}} | 244 ; CHECK: adc ecx,{{.*e.[^x]}} |
240 ; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired). | 245 ; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired). |
241 ; It can be esi, edi, or ebp though, for example (so we need to be careful | 246 ; It can be esi, edi, or ebp though, for example (so we need to be careful |
242 ; about rejecting eb* and ed*.) | 247 ; about rejecting eb* and ed*.) |
243 ; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} | 248 ; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}} |
244 ; CHECK: jne [[LABEL]] | 249 ; CHECK: jne [[LABEL]] |
245 | 250 |
| 251 ; Same test as above, but with a global address to test FakeUse issues. |
| 252 define i64 @test_atomic_rmw_add_64_global(i64 %v) { |
| 253 entry: |
| 254 %ptr = bitcast [8 x i8]* @Global64 to i64* |
| 255 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) |
| 256 ret i64 %a |
| 257 } |
| 258 ; CHECK-LABEL: test_atomic_rmw_add_64_global |
| 259 |
246 ; Test with some more register pressure. When we have an alloca, ebp is | 260 ; Test with some more register pressure. When we have an alloca, ebp is |
247 ; used to manage the stack frame, so it cannot be used as a register either. | 261 ; used to manage the stack frame, so it cannot be used as a register either. |
248 declare void @use_ptr(i32 %iptr) | 262 declare void @use_ptr(i32 %iptr) |
249 | 263 |
250 define i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) { | 264 define i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) { |
251 entry: | 265 entry: |
252 %alloca_ptr = alloca i8, i32 16, align 16 | 266 %alloca_ptr = alloca i8, i32 16, align 16 |
253 %ptr = inttoptr i32 %iptr to i64* | 267 %ptr = inttoptr i32 %iptr to i64* |
254 %old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) | 268 %old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6) |
255 store i8 0, i8* %alloca_ptr, align 1 | 269 store i8 0, i8* %alloca_ptr, align 1 |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
395 ret i32 %a_ext | 409 ret i32 %a_ext |
396 } | 410 } |
397 ; CHECK-LABEL: test_atomic_rmw_or_8 | 411 ; CHECK-LABEL: test_atomic_rmw_or_8 |
398 ; CHECK: mov al,BYTE PTR | 412 ; CHECK: mov al,BYTE PTR |
399 ; Dest cannot be eax here, because eax is used for the old value. Also want | 413 ; Dest cannot be eax here, because eax is used for the old value. Also want |
400 ; to make sure that cmpxchg's source is the same register. | 414 ; to make sure that cmpxchg's source is the same register. |
401 ; CHECK: or [[REG:[^a].]] | 415 ; CHECK: or [[REG:[^a].]] |
402 ; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]] | 416 ; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]] |
403 ; CHECK: jne | 417 ; CHECK: jne |
404 | 418 |
| 419 ; Same test as above, but with a global address to test FakeUse issues. |
| 420 define i32 @test_atomic_rmw_or_8_global(i32 %v) { |
| 421 entry: |
| 422 %trunc = trunc i32 %v to i8 |
| 423 %ptr = bitcast [1 x i8]* @Global8 to i8* |
| 424 %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6) |
| 425 %a_ext = zext i8 %a to i32 |
| 426 ret i32 %a_ext |
| 427 } |
| 428 ; CHECK-LABEL: test_atomic_rmw_or_8_global |
| 429 |
405 define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) { | 430 define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) { |
406 entry: | 431 entry: |
407 %trunc = trunc i32 %v to i16 | 432 %trunc = trunc i32 %v to i16 |
408 %ptr = inttoptr i32 %iptr to i16* | 433 %ptr = inttoptr i32 %iptr to i16* |
409 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6) | 434 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6) |
410 %a_ext = zext i16 %a to i32 | 435 %a_ext = zext i16 %a to i32 |
411 ret i32 %a_ext | 436 ret i32 %a_ext |
412 } | 437 } |
413 ; CHECK-LABEL: test_atomic_rmw_or_16 | 438 ; CHECK-LABEL: test_atomic_rmw_or_16 |
414 ; CHECK: mov ax,WORD PTR | 439 ; CHECK: mov ax,WORD PTR |
415 ; CHECK: or [[REG:[^a].]] | 440 ; CHECK: or [[REG:[^a].]] |
416 ; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],[[REG]] | 441 ; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],[[REG]] |
417 ; CHECK: jne | 442 ; CHECK: jne |
418 | 443 |
| 444 ; Same test as above, but with a global address to test FakeUse issues. |
| 445 define i32 @test_atomic_rmw_or_16_global(i32 %v) { |
| 446 entry: |
| 447 %trunc = trunc i32 %v to i16 |
| 448 %ptr = bitcast [2 x i8]* @Global16 to i16* |
| 449 %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6) |
| 450 %a_ext = zext i16 %a to i32 |
| 451 ret i32 %a_ext |
| 452 } |
| 453 ; CHECK-LABEL: test_atomic_rmw_or_16_global |
| 454 |
419 define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) { | 455 define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) { |
420 entry: | 456 entry: |
421 %ptr = inttoptr i32 %iptr to i32* | 457 %ptr = inttoptr i32 %iptr to i32* |
422 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) | 458 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) |
423 ret i32 %a | 459 ret i32 %a |
424 } | 460 } |
425 ; CHECK-LABEL: test_atomic_rmw_or_32 | 461 ; CHECK-LABEL: test_atomic_rmw_or_32 |
426 ; CHECK: mov eax,DWORD PTR | 462 ; CHECK: mov eax,DWORD PTR |
427 ; CHECK: or [[REG:e[^a].]] | 463 ; CHECK: or [[REG:e[^a].]] |
428 ; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]] | 464 ; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]] |
429 ; CHECK: jne | 465 ; CHECK: jne |
430 | 466 |
| 467 ; Same test as above, but with a global address to test FakeUse issues. |
| 468 define i32 @test_atomic_rmw_or_32_global(i32 %v) { |
| 469 entry: |
| 470 %ptr = bitcast [4 x i8]* @Global32 to i32* |
| 471 %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6) |
| 472 ret i32 %a |
| 473 } |
| 474 ; CHECK-LABEL: test_atomic_rmw_or_32_global |
| 475 |
431 define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) { | 476 define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) { |
432 entry: | 477 entry: |
433 %ptr = inttoptr i32 %iptr to i64* | 478 %ptr = inttoptr i32 %iptr to i64* |
434 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6) | 479 %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6) |
435 ret i64 %a | 480 ret i64 %a |
436 } | 481 } |
437 ; CHECK-LABEL: test_atomic_rmw_or_64 | 482 ; CHECK-LABEL: test_atomic_rmw_or_64 |
438 ; CHECK: push ebx | 483 ; CHECK: push ebx |
439 ; CHECK: mov eax,DWORD PTR [{{.*}}] | 484 ; CHECK: mov eax,DWORD PTR [{{.*}}] |
440 ; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] | 485 ; CHECK: mov edx,DWORD PTR [{{.*}}+0x4] |
(...skipping 426 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
867 | 912 |
868 not_lock_free: | 913 not_lock_free: |
869 %z = add i32 %x, %y | 914 %z = add i32 %x, %y |
870 ret i32 %z | 915 ret i32 %z |
871 } | 916 } |
872 ; CHECK-LABEL: test_atomic_is_lock_free_can_dce | 917 ; CHECK-LABEL: test_atomic_is_lock_free_can_dce |
873 ; CHECK: mov {{.*}},0x1 | 918 ; CHECK: mov {{.*}},0x1 |
874 ; CHECK: ret | 919 ; CHECK: ret |
875 ; CHECK: add | 920 ; CHECK: add |
876 ; CHECK: ret | 921 ; CHECK: ret |
OLD | NEW |