| Index: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
|
| diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
|
| index d29acf3c12201e6d8f3f7c1e7931c2d8d2db8885..b0da81751a4af91005b639d68956fe54edf1cb7f 100644
|
| --- a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
|
| +++ b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
|
| @@ -3,14 +3,12 @@
|
| ; (unlike the non-"all" variety of nacl.atomic.fence, which only
|
| ; applies to atomic load/stores).
|
| ;
|
| -; TODO(kschimpf) Find out why lc2i is needed.
|
| -; RUN: %lc2i -i %s --args -O2 --verbose none \
|
| +; RUN: %p2i -i %s --args -O2 --verbose none \
|
| ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
|
| ; RUN: | llvm-objdump -d -r -symbolize -x86-asm-syntax=intel - | FileCheck %s
|
|
|
| ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we
|
| -; have 0 == g32_a, 4 == g32_b, 8 == g32_c.
|
| -; g32_d is also 0 because it's in the .data section instead of .bss.
|
| +; have 0 == g32_a, 4 == g32_b, 8 == g32_c, 12 == g32_d
|
|
|
| declare void @llvm.nacl.atomic.fence.all()
|
| declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
|
| @@ -19,7 +17,7 @@ declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
|
| @g32_a = internal global [4 x i8] zeroinitializer, align 4
|
| @g32_b = internal global [4 x i8] zeroinitializer, align 4
|
| @g32_c = internal global [4 x i8] zeroinitializer, align 4
|
| -@g32_d = internal global [4 x i8] c"\02\00\00\00", align 4
|
| +@g32_d = internal global [4 x i8] zeroinitializer, align 4
|
|
|
| define i32 @test_fused_load_add_a() {
|
| entry:
|
| @@ -33,12 +31,12 @@ entry:
|
| call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
|
|
|
| %p_b = bitcast [4 x i8]* @g32_b to i32*
|
| - %l_b = load i32* %p_b
|
| + %l_b = load i32* %p_b, align 1
|
| %l_b2 = add i32 %l_b, 1
|
| store i32 %l_b2, i32* %p_b, align 1
|
|
|
| %p_c = bitcast [4 x i8]* @g32_c to i32*
|
| - %l_c = load i32* %p_c
|
| + %l_c = load i32* %p_c, align 1
|
| %l_c2 = add i32 %l_c, 1
|
| call void @llvm.nacl.atomic.fence.all()
|
| store i32 %l_c2, i32* %p_c, align 1
|
| @@ -50,19 +48,16 @@ entry:
|
| ; CHECK: mov {{.*}}, esp
|
| ; CHECK: mov dword ptr {{.*}}, 999
|
| ; atomic store (w/ its own mfence)
|
| -; CHECK: mov {{.*}}, 0
|
| -; CHECK-NEXT: R_386_32
|
| ; The load + add are optimized into one everywhere.
|
| -; CHECK: add {{.*}}, dword ptr
|
| +; CHECK: add {{.*}}, dword ptr [0]
|
| +; CHECK-NEXT: R_386_32
|
| ; CHECK: mov dword ptr
|
| ; CHECK: mfence
|
| -; CHECK: mov {{.*}}, 4
|
| +; CHECK: add {{.*}}, dword ptr [4]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mov dword ptr
|
| -; CHECK: mov {{.*}}, 8
|
| +; CHECK: add {{.*}}, dword ptr [8]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mfence
|
| ; CHECK: mov dword ptr
|
|
|
| @@ -79,13 +74,13 @@ entry:
|
| call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
|
|
|
| %p_b = bitcast [4 x i8]* @g32_b to i32*
|
| - %l_b = load i32* %p_b
|
| + %l_b = load i32* %p_b, align 1
|
| %l_b2 = add i32 %l_b, 1
|
| store i32 %l_b2, i32* %p_b, align 1
|
|
|
| %p_c = bitcast [4 x i8]* @g32_c to i32*
|
| call void @llvm.nacl.atomic.fence.all()
|
| - %l_c = load i32* %p_c
|
| + %l_c = load i32* %p_c, align 1
|
| %l_c2 = add i32 %l_c, 1
|
| store i32 %l_c2, i32* %p_c, align 1
|
|
|
| @@ -96,21 +91,18 @@ entry:
|
| ; CHECK: mov {{.*}}, esp
|
| ; CHECK: mov dword ptr {{.*}}, 999
|
| ; atomic store (w/ its own mfence)
|
| -; CHECK: mov {{.*}}, 0
|
| +; CHECK: add {{.*}}, dword ptr [0]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mov dword ptr
|
| ; CHECK: mfence
|
| -; CHECK: mov {{.*}}, 4
|
| +; CHECK: add {{.*}}, dword ptr [4]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mov dword ptr
|
| -; CHECK: mov {{.*}}, 8
|
| -; CHECK-NEXT: R_386_32
|
| ; CHECK: mfence
|
| ; Load + add can still be optimized into one instruction
|
| ; because it is not separated by a fence.
|
| -; CHECK: add {{.*}}, dword ptr
|
| +; CHECK: add {{.*}}, dword ptr [8]
|
| +; CHECK-NEXT: R_386_32
|
| ; CHECK: mov dword ptr
|
|
|
| ; Test with the fence splitting a load/add.
|
| @@ -126,13 +118,13 @@ entry:
|
| call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
|
|
|
| %p_b = bitcast [4 x i8]* @g32_b to i32*
|
| - %l_b = load i32* %p_b
|
| + %l_b = load i32* %p_b, align 1
|
| call void @llvm.nacl.atomic.fence.all()
|
| %l_b2 = add i32 %l_b, 1
|
| store i32 %l_b2, i32* %p_b, align 1
|
|
|
| %p_c = bitcast [4 x i8]* @g32_c to i32*
|
| - %l_c = load i32* %p_c
|
| + %l_c = load i32* %p_c, align 1
|
| %l_c2 = add i32 %l_c, 1
|
| store i32 %l_c2, i32* %p_c, align 1
|
|
|
| @@ -143,23 +135,20 @@ entry:
|
| ; CHECK: mov {{.*}}, esp
|
| ; CHECK: mov dword ptr {{.*}}, 999
|
| ; atomic store (w/ its own mfence)
|
| -; CHECK: mov {{.*}}, 0
|
| +; CHECK: add {{.*}}, dword ptr [0]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mov dword ptr
|
| ; CHECK: mfence
|
| -; CHECK: mov {{.*}}, 4
|
| -; CHECK-NEXT: R_386_32
|
| ; This load + add are no longer optimized into one,
|
| ; though perhaps it should be legal as long as
|
| ; the load stays on the same side of the fence.
|
| -; CHECK: mov {{.*}}, dword ptr
|
| +; CHECK: mov {{.*}}, dword ptr [4]
|
| +; CHECK-NEXT: R_386_32
|
| ; CHECK: mfence
|
| ; CHECK: add {{.*}}, 1
|
| ; CHECK: mov dword ptr
|
| -; CHECK: mov {{.*}}, 8
|
| +; CHECK: add {{.*}}, dword ptr [8]
|
| ; CHECK-NEXT: R_386_32
|
| -; CHECK: add {{.*}}, dword ptr
|
| ; CHECK: mov dword ptr
|
|
|
|
|
| @@ -168,22 +157,22 @@ entry:
|
| define i32 @could_have_fused_loads() {
|
| entry:
|
| %ptr1 = bitcast [4 x i8]* @g32_d to i8*
|
| - %b1 = load i8* %ptr1
|
| + %b1 = load i8* %ptr1, align 1
|
|
|
| %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32
|
| %int_ptr_bump2 = add i32 %int_ptr2, 1
|
| %ptr2 = inttoptr i32 %int_ptr_bump2 to i8*
|
| - %b2 = load i8* %ptr2
|
| + %b2 = load i8* %ptr2, align 1
|
|
|
| %int_ptr_bump3 = add i32 %int_ptr2, 2
|
| %ptr3 = inttoptr i32 %int_ptr_bump3 to i8*
|
| - %b3 = load i8* %ptr3
|
| + %b3 = load i8* %ptr3, align 1
|
|
|
| call void @llvm.nacl.atomic.fence.all()
|
|
|
| %int_ptr_bump4 = add i32 %int_ptr2, 3
|
| %ptr4 = inttoptr i32 %int_ptr_bump4 to i8*
|
| - %b4 = load i8* %ptr4
|
| + %b4 = load i8* %ptr4, align 1
|
|
|
| %b1.ext = zext i8 %b1 to i32
|
| %b2.ext = zext i8 %b2 to i32
|
| @@ -198,11 +187,10 @@ entry:
|
| ret i32 %b1234
|
| }
|
| ; CHECK-LABEL: could_have_fused_loads
|
| -; CHECK: mov {{.*}}, 0
|
| +; CHECK: mov {{.*}}, byte ptr [12]
|
| ; CHECK-NEXT: R_386_32
|
| ; CHECK: mov {{.*}}, byte ptr
|
| ; CHECK: mov {{.*}}, byte ptr
|
| -; CHECK: mov {{.*}}, byte ptr
|
| ; CHECK: mfence
|
| ; CHECK: mov {{.*}}, byte ptr
|
|
|
| @@ -215,19 +203,19 @@ entry:
|
| %cmp = icmp eq i32 %x, 1
|
| br i1 %cmp, label %branch1, label %branch2
|
| branch1:
|
| - %y = load i32* %ptr
|
| + %y = load i32* %ptr, align 1
|
| ret i32 %y
|
| branch2:
|
| call void @llvm.nacl.atomic.fence.all()
|
| - %z = load i32* %ptr
|
| + %z = load i32* %ptr, align 1
|
| ret i32 %z
|
| }
|
| ; CHECK-LABEL: could_have_hoisted_loads
|
| -; CHECK: mov {{.*}}, 0
|
| -; CHECK-NEXT: R_386_32
|
| ; CHECK: jne {{.*}}
|
| -; CHECK: mov {{.*}}, dword ptr
|
| +; CHECK: mov {{.*}}, dword ptr [12]
|
| +; CHECK-NEXT: R_386_32
|
| ; CHECK: ret
|
| ; CHECK: mfence
|
| -; CHECK: mov {{.*}}, dword ptr
|
| +; CHECK: mov {{.*}}, dword ptr [12]
|
| +; CHECK-NEXT: R_386_32
|
| ; CHECK: ret
|
|
|