| Index: tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
|
| diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
|
| index 8dfcc6120e77c04b896bec125432063705574c69..9885b88b8dfe242322511fd7eb50900439852c61 100644
|
| --- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
|
| +++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
|
| @@ -2,6 +2,7 @@
|
| ; size allowed.
|
|
|
| ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
|
| +; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM
|
| ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
|
| ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
|
| ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
|
| @@ -28,6 +29,11 @@ declare void @llvm.nacl.atomic.fence(i32)
|
| declare void @llvm.nacl.atomic.fence.all()
|
| declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
|
|
|
| +; NOTE: The LLC equivalent for 16-bit atomic operations are expanded
|
| +; as 32-bit operations. For Subzero, assume that real 16-bit operations
|
| +; will be usable (the validator will be fixed):
|
| +; https://code.google.com/p/nativeclient/issues/detail?id=2981
|
| +
|
| ;;; Load
|
|
|
| ; x86 guarantees load/store to be atomic if naturally aligned.
|
| @@ -107,7 +113,6 @@ entry:
|
| ; CHECK: movq x{{.*}}, qword
|
| ; CHECK: movq qword {{.*}}, x{{.*}}
|
|
|
| -
|
| ;;; Store
|
|
|
| define void @test_atomic_store_8(i32 %iptr, i32 %v) {
|
| @@ -169,6 +174,8 @@ entry:
|
|
|
| ;;; RMW
|
|
|
| +;; add
|
| +
|
| define i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) {
|
| entry:
|
| %trunc = trunc i32 %v to i8
|
| @@ -180,7 +187,7 @@ entry:
|
| }
|
| ; CHECK-LABEL: test_atomic_rmw_add_8
|
| ; CHECK: lock xadd byte {{.*}}, [[REG:.*]]
|
| -; CHECK: mov {{.*}}, {{.*}}[[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
|
|
| define i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
|
| entry:
|
| @@ -192,7 +199,7 @@ entry:
|
| }
|
| ; CHECK-LABEL: test_atomic_rmw_add_16
|
| ; CHECK: lock xadd word {{.*}}, [[REG:.*]]
|
| -; CHECK: mov {{.*}}, {{.*}}[[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
|
|
| define i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
|
| entry:
|
| @@ -202,16 +209,61 @@ entry:
|
| }
|
| ; CHECK-LABEL: test_atomic_rmw_add_32
|
| ; CHECK: lock xadd dword {{.*}}, [[REG:.*]]
|
| -; CHECK: mov {{.*}}, {{.*}}[[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
|
|
| -;define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i64*
|
| -; %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
|
| -; ret i64 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_add_64
|
| -; CHECKLATER: uh need a... cmpxchg8b loop.
|
| +define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_add_64
|
| +; CHECK: push ebx
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; RHS of add cannot be any of the e[abcd]x regs because they are
|
| +; clobbered in the loop, and the RHS needs to be remain live.
|
| +; CHECK: add ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: adc ecx, {{.*e.[^x]}}
|
| +; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
|
| +; It can be esi, edi, or ebp though, for example (so we need to be careful
|
| +; about rejecting eb* and ed*.)
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +; Test with some more register pressure. When we have an alloca, ebp is
|
| +; used to manage the stack frame, so it cannot be used as a register either.
|
| +declare void @use_ptr(i32 %iptr)
|
| +
|
| +define i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %alloca_ptr = alloca i8, i32 16, align 16
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
|
| + store i8 0, i8* %alloca_ptr, align 1
|
| + store i8 1, i8* %alloca_ptr, align 1
|
| + store i8 2, i8* %alloca_ptr, align 1
|
| + store i8 3, i8* %alloca_ptr, align 1
|
| + %__5 = ptrtoint i8* %alloca_ptr to i32
|
| + call void @use_ptr(i32 %__5)
|
| + ret i64 %old
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_add_64_alloca
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
|
| +; It also cannot be ebp since we use that for alloca. Also make sure it's
|
| +; not esp, since that's the stack pointer and mucking with it will break
|
| +; the later use_ptr function call.
|
| +; That pretty much leaves esi, or edi as the only viable registers.
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{[ds]}}i]
|
| +; CHECK: call use_ptr
|
|
|
| define i32 @test_atomic_rmw_add_32_ignored(i32 %iptr, i32 %v) {
|
| entry:
|
| @@ -219,129 +271,562 @@ entry:
|
| %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6)
|
| ret i32 %v
|
| }
|
| +; Technically this could use "lock add" instead of "lock xadd", if liveness
|
| +; tells us that the destination variable is dead.
|
| ; CHECK-LABEL: test_atomic_rmw_add_32_ignored
|
| ; CHECK: lock xadd dword {{.*}}, [[REG:.*]]
|
|
|
| -;define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
|
| -; ret i32 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_sub_32
|
| -; CHECKLATER: neg
|
| -; CHECKLATER: lock
|
| -; CHECKLATER: xadd
|
| -
|
| -;define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
|
| -; ret i32 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_or_32
|
| -; Need a cmpxchg loop.
|
| -
|
| -;define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
|
| -; ret i32 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_and_32
|
| -; Also a cmpxchg loop.
|
| -
|
| -;define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
|
| -; ret i32 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_xor_32
|
| -; Also a cmpxchg loop.
|
| -
|
| -;define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
|
| -; ret i32 %a
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_rmw_xchg_32
|
| +; Atomic RMW 64 needs to be expanded into its own loop.
|
| +; Make sure that works w/ non-trivial function bodies.
|
| +define i64 @test_atomic_rmw_add_64_loop(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %x = icmp ult i64 %v, 100
|
| + br i1 %x, label %err, label %loop
|
| +
|
| +loop:
|
| + %v_next = phi i64 [ %v, %entry ], [ %next, %loop ]
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %next = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v_next, i32 6)
|
| + %success = icmp eq i64 %next, 100
|
| + br i1 %success, label %done, label %loop
|
| +
|
| +done:
|
| + ret i64 %next
|
| +
|
| +err:
|
| + ret i64 0
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_add_64_loop
|
| +; CHECK: push ebx
|
| +; CHECK-LABEL: .Ltest_atomic_rmw_add_64_loop{{.*}}loop
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; CHECK: add ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: adc ecx, {{.*e.[^x]}}
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +; CHECK-LABEL: .Ltest_atomic_rmw_add_64_loop{{.*}}done
|
| +
|
| +;; sub
|
| +
|
| +define i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %trunc, i32 6)
|
| + %a_ext = zext i8 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_sub_8
|
| +; CHECK: neg [[REG:.*]]
|
| +; CHECK: lock xadd byte {{.*}}, [[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
| +
|
| +define i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %trunc, i32 6)
|
| + %a_ext = zext i16 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_sub_16
|
| +; CHECK: neg [[REG:.*]]
|
| +; CHECK: lock xadd word {{.*}}, [[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
| +
|
| +define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_sub_32
|
| +; CHECK: neg [[REG:.*]]
|
| +; CHECK: lock xadd dword {{.*}}, [[REG]]
|
| +; CHECK: mov {{.*}}, [[REG]]
|
| +
|
| +define i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_sub_64
|
| +; CHECK: push ebx
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; CHECK: sub ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: sbb ecx, {{.*e.[^x]}}
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +
|
| +define i32 @test_atomic_rmw_sub_32_ignored(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %v
|
| +}
|
| +; Could use "lock sub" instead of "neg; lock xadd"
|
| +; CHECK-LABEL: test_atomic_rmw_sub_32_ignored
|
| +; CHECK: neg [[REG:.*]]
|
| +; CHECK: lock xadd dword {{.*}}, [[REG]]
|
| +
|
| +;; or
|
| +
|
| +define i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
|
| + %a_ext = zext i8 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_or_8
|
| +; CHECK: mov al, byte ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; Dest cannot be eax here, because eax is used for the old value. Also want
|
| +; to make sure that cmpxchg's source is the same register.
|
| +; CHECK: or [[REG:[^a].]]
|
| +; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
|
| + %a_ext = zext i16 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_or_16
|
| +; CHECK: mov ax, word ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: or [[REG:[^a].]]
|
| +; CHECK: lock cmpxchg word ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_or_32
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: or [[REG:e[^a].]]
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_or_64
|
| +; CHECK: push ebx
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; CHECK: or ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: or ecx, {{.*e.[^x]}}
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_or_32_ignored(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %v
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_or_32_ignored
|
| +; Could just "lock or", if we inspect the liveness information first.
|
| +; Would also need a way to introduce "lock"'edness to binary
|
| +; operators without introducing overhead on the more common binary ops.
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: or [[REG:e[^a].]]
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +;; and
|
| +
|
| +define i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %trunc, i32 6)
|
| + %a_ext = zext i8 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_and_8
|
| +; CHECK: mov al, byte ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: and [[REG:[^a].]]
|
| +; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %trunc, i32 6)
|
| + %a_ext = zext i16 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_and_16
|
| +; CHECK: mov ax, word ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: and
|
| +; CHECK: lock cmpxchg word ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_and_32
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: and
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_and_64
|
| +; CHECK: push ebx
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; CHECK: and ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: and ecx, {{.*e.[^x]}}
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_and_32_ignored(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %v
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_and_32_ignored
|
| +; Could just "lock and"
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: and
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +;; xor
|
| +
|
| +define i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %trunc, i32 6)
|
| + %a_ext = zext i8 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xor_8
|
| +; CHECK: mov al, byte ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: xor [[REG:[^a].]]
|
| +; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %trunc, i32 6)
|
| + %a_ext = zext i16 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xor_16
|
| +; CHECK: mov ax, word ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: xor
|
| +; CHECK: lock cmpxchg word ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +
|
| +define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xor_32
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: xor
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xor_64
|
| +; CHECK: push ebx
|
| +; CHECK: mov eax, dword ptr [{{.*}}]
|
| +; CHECK: mov edx, dword ptr [{{.*}}+4]
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: mov ebx, eax
|
| +; CHECK: or ebx, {{.*e.[^x]}}
|
| +; CHECK: mov ecx, edx
|
| +; CHECK: or ecx, {{.*e.[^x]}}
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_xor_32_ignored(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %v
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xor_32_ignored
|
| +; CHECK: mov eax, dword ptr
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: xor
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +;; exchange
|
| +
|
| +define i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %trunc, i32 6)
|
| + %a_ext = zext i8 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xchg_8
|
| +; CHECK: xchg byte ptr {{.*}}, [[REG:.*]]
|
| +
|
| +define i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %trunc = trunc i32 %v to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %trunc, i32 6)
|
| + %a_ext = zext i16 %a to i32
|
| + ret i32 %a_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xchg_16
|
| +; CHECK: xchg word ptr {{.*}}, [[REG:.*]]
|
| +
|
| +define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xchg_32
|
| +; CHECK: xchg dword ptr {{.*}}, [[REG:.*]]
|
| +
|
| +define i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %v, i32 6)
|
| + ret i64 %a
|
| +}
|
| +; CHECK-LABEL: test_atomic_rmw_xchg_64
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; CHECK: .L[[LABEL:.*]]:
|
| +; CHECK: lock cmpxchg8b qword ptr [{{e.[^x]}}]
|
| +; CHECK: jne .L[[LABEL]]
|
| +
|
| +define i32 @test_atomic_rmw_xchg_32_ignored(i32 %iptr, i32 %v) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
|
| + ret i32 %v
|
| +}
|
| +; In this case, ignoring the return value doesn't help. The xchg is
|
| +; used to do an atomic store.
|
| +; CHECK-LABEL: test_atomic_rmw_xchg_32_ignored
|
| +; CHECK: xchg dword ptr {{.*}}, [[REG:.*]]
|
|
|
| ;;;; Cmpxchg
|
|
|
| -;define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i8*
|
| -; %trunc_exp = trunc i32 %expected to i8
|
| -; %trunc_des = trunc i32 %desired to i8
|
| -; %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
|
| -; i8 %trunc_des, i32 6, i32 6)
|
| -; %old_ext = zext i8 %old to i32
|
| -; ret i32 %old_ext
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_cmpxchg_8
|
| -; CHECKLATER: lock cmpxchg byte
|
| -
|
| -;define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i16*
|
| -; %trunc_exp = trunc i32 %expected to i16
|
| -; %trunc_des = trunc i32 %desired to i16
|
| -; %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
|
| -; i16 %trunc_des, i32 6, i32 6)
|
| -; %old_ext = zext i16 %old to i32
|
| -; ret i32 %old_ext
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_cmpxchg_16
|
| -; This one is a bit gross for NaCl right now.
|
| -; https://code.google.com/p/nativeclient/issues/detail?id=2981
|
| -; But we'll assume that NaCl will have it fixed...
|
| -; CHECKLATER: lock cmpxchg word
|
| -
|
| -;define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
|
| -; i32 %desired, i32 6, i32 6)
|
| -; ret i32 %old
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_cmpxchg_32
|
| -; CHECKLATER: mov eax
|
| -; CHECKLATER: mov ecx
|
| -; CHECKLATER: lock cmpxchg dword
|
| -
|
| -;define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
|
| -;entry:
|
| -; %ptr = inttoptr i32 %iptr to i64*
|
| -; %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
|
| -; i64 %desired, i32 6, i32 6)
|
| -; ret i64 %old
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_cmpxchg_64
|
| -; CHECKLATER: mov eax
|
| -; CHECKLATER: mov edx
|
| -; CHECKLATER: mov ebx
|
| -; CHECKLATER: mov ecx
|
| -; CHECKLATER: lock cmpxchg8b qword
|
| -
|
| -;define i32 @test_atomic_cmpxchg_32_loop(i32 %iptr,
|
| -; i32 %expected, i32 %desired) {
|
| -;entry:
|
| -; br label %loop
|
| -;
|
| -;loop:
|
| -; %cmp = phi i32 [ %expected, %entry], [%old, %loop]
|
| -; %ptr = inttoptr i32 %iptr to i32*
|
| -; %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %cmp,
|
| -; i32 %desired, i32 6, i32 6)
|
| -; %success = icmp eq i32 %cmp, %old
|
| -; br i1 %success, label %done, label %loop
|
| -;
|
| -;done:
|
| -; ret i32 %old
|
| -;}
|
| -; CHECKLATER-LABEL: test_atomic_cmpxchg_32_loop
|
| +define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
|
| +entry:
|
| + %trunc_exp = trunc i32 %expected to i8
|
| + %trunc_des = trunc i32 %desired to i8
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
|
| + i8 %trunc_des, i32 6, i32 6)
|
| + %old_ext = zext i8 %old to i32
|
| + ret i32 %old_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_8
|
| +; CHECK: mov al, {{.*}}
|
| +; Need to check that eax isn't used as the address register or the desired.
|
| +; since it is already used as the *expected* register.
|
| +; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], {{[^a]}}
|
| +
|
| +define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
|
| +entry:
|
| + %trunc_exp = trunc i32 %expected to i16
|
| + %trunc_des = trunc i32 %desired to i16
|
| + %ptr = inttoptr i32 %iptr to i16*
|
| + %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
|
| + i16 %trunc_des, i32 6, i32 6)
|
| + %old_ext = zext i16 %old to i32
|
| + ret i32 %old_ext
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_16
|
| +; CHECK: mov ax, {{.*}}
|
| +; CHECK: lock cmpxchg word ptr [e{{[^a].}}], {{[^a]}}
|
| +
|
| +define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
|
| + i32 %desired, i32 6, i32 6)
|
| + ret i32 %old
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_32
|
| +; CHECK: mov eax, {{.*}}
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], e{{[^a]}}
|
| +
|
| +define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
|
| + i64 %desired, i32 6, i32 6)
|
| + ret i64 %old
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_64
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; edx and eax are already the return registers, so they don't actually
|
| +; need to be reshuffled via movs. The next test stores the result
|
| +; somewhere, so in that case they do need to be mov'ed.
|
| +
|
| +; Test a case where %old really does need to be copied out of edx:eax.
|
| +define void @test_atomic_cmpxchg_64_store(i32 %ret_iptr, i32 %iptr, i64 %expected, i64 %desired) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
|
| + i64 %desired, i32 6, i32 6)
|
| + %__6 = inttoptr i32 %ret_iptr to i64*
|
| + store i64 %old, i64* %__6, align 1
|
| + ret void
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_64_store
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +; CHECK: mov {{.*}}, edx
|
| +; CHECK: mov {{.*}}, eax
|
| +
|
| +; Test with some more register pressure. When we have an alloca, ebp is
|
| +; used to manage the stack frame, so it cannot be used as a register either.
|
| +define i64 @test_atomic_cmpxchg_64_alloca(i32 %iptr, i64 %expected, i64 %desired) {
|
| +entry:
|
| + %alloca_ptr = alloca i8, i32 16, align 16
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
|
| + i64 %desired, i32 6, i32 6)
|
| + store i8 0, i8* %alloca_ptr, align 1
|
| + store i8 1, i8* %alloca_ptr, align 1
|
| + store i8 2, i8* %alloca_ptr, align 1
|
| + store i8 3, i8* %alloca_ptr, align 1
|
| + %__6 = ptrtoint i8* %alloca_ptr to i32
|
| + call void @use_ptr(i32 %__6)
|
| + ret i64 %old
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_64_alloca
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
|
| +; It also cannot be ebp since we use that for alloca. Also make sure it's
|
| +; not esp, since that's the stack pointer and mucking with it will break
|
| +; the later use_ptr function call.
|
| +; That pretty much leaves esi, or edi as the only viable registers.
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{[ds]}}i]
|
| +; CHECK: call use_ptr
|
| +
|
| +define i32 @test_atomic_cmpxchg_32_ignored(i32 %iptr, i32 %expected, i32 %desired) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %ignored = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
|
| + i32 %desired, i32 6, i32 6)
|
| + ret i32 0
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_32_ignored
|
| +; CHECK: mov eax, {{.*}}
|
| +; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
|
| +
|
| +define i64 @test_atomic_cmpxchg_64_ignored(i32 %iptr, i64 %expected, i64 %desired) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i64*
|
| + %ignored = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
|
| + i64 %desired, i32 6, i32 6)
|
| + ret i64 0
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_64_ignored
|
| +; CHECK: push ebx
|
| +; CHECK-DAG: mov edx
|
| +; CHECK-DAG: mov eax
|
| +; CHECK-DAG: mov ecx
|
| +; CHECK-DAG: mov ebx
|
| +; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
|
| +
|
| +define i32 @test_atomic_cmpxchg_32_loop(i32 %iptr, i32 %expected, i32 %desired) {
|
| +entry:
|
| + br label %loop
|
| +
|
| +loop:
|
| + %cmp = phi i32 [ %expected, %entry ], [ %old, %loop ]
|
| + %ptr = inttoptr i32 %iptr to i32*
|
| + %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %cmp,
|
| + i32 %desired, i32 6, i32 6)
|
| + %success = icmp eq i32 %cmp, %old
|
| + br i1 %success, label %done, label %loop
|
| +
|
| +done:
|
| + ret i32 %old
|
| +}
|
| +; CHECK-LABEL: test_atomic_cmpxchg_32_loop
|
|
|
| ;;;; Fence and is-lock-free.
|
|
|
| @@ -381,6 +866,19 @@ entry:
|
| ; CHECK-LABEL: test_not_lock_free
|
| ; CHECK: mov {{.*}}, 0
|
|
|
| +define i32 @test_atomic_is_lock_free_ignored(i32 %iptr) {
|
| +entry:
|
| + %ptr = inttoptr i32 %iptr to i8*
|
| + %ignored = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
|
| + ret i32 0
|
| +}
|
| +; CHECK-LABEL: test_atomic_is_lock_free_ignored
|
| +; CHECK: mov {{.*}}, 0
|
| +; This can get optimized out, because it's side-effect-free.
|
| +; CHECKO2REM-LABEL: test_atomic_is_lock_free_ignored
|
| +; CHECKO2REM-NOT: mov {{.*}}, 1
|
| +; CHECKO2REM: mov {{.*}}, 0
|
| +
|
| ; TODO(jvoung): at some point we can take advantage of the
|
| ; fact that nacl.atomic.is.lock.free will resolve to a constant
|
| ; (which adds DCE opportunities). Once we optimize, the test expectations
|
|
|