Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(363)

Unified Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
index 1b335465576c9f2082f37250f76e9095337657f7..01e2048d42166997b1bf8c866bbd6fb7dd7ed29f 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
@@ -14,7 +14,7 @@ declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
@g32_c = internal global [4 x i8] zeroinitializer, align 4
@g32_d = internal global [4 x i8] zeroinitializer, align 4
-define i32 @test_fused_load_add_a() {
+define i32 @test_fused_load_sub_a() {
entry:
%p_alloca = alloca i8, i32 4, align 4
%p_alloca_bc = bitcast i8* %p_alloca to i32*
@@ -22,39 +22,39 @@ entry:
%p_a = bitcast [4 x i8]* @g32_a to i32*
%l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
- %l_a2 = add i32 %l_a, 1
+ %l_a2 = sub i32 1, %l_a
call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
%p_b = bitcast [4 x i8]* @g32_b to i32*
%l_b = load i32, i32* %p_b, align 1
- %l_b2 = add i32 %l_b, 1
+ %l_b2 = sub i32 1, %l_b
store i32 %l_b2, i32* %p_b, align 1
%p_c = bitcast [4 x i8]* @g32_c to i32*
%l_c = load i32, i32* %p_c, align 1
- %l_c2 = add i32 %l_c, 1
+ %l_c2 = sub i32 1, %l_c
call void @llvm.nacl.atomic.fence.all()
store i32 %l_c2, i32* %p_c, align 1
ret i32 %l_c2
}
-; CHECK-LABEL: test_fused_load_add_a
+; CHECK-LABEL: test_fused_load_sub_a
; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
-; The load + add are optimized into one everywhere.
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a
+; The load + sub are optimized into one everywhere.
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
; CHECK: mov DWORD PTR
; CHECK: mfence
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
; CHECK: mov DWORD PTR
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
; CHECK: mfence
; CHECK: mov DWORD PTR
; Test with the fence moved up a bit.
-define i32 @test_fused_load_add_b() {
+define i32 @test_fused_load_sub_b() {
entry:
%p_alloca = alloca i8, i32 4, align 4
%p_alloca_bc = bitcast i8* %p_alloca to i32*
@@ -62,40 +62,40 @@ entry:
%p_a = bitcast [4 x i8]* @g32_a to i32*
%l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
- %l_a2 = add i32 %l_a, 1
+ %l_a2 = sub i32 1, %l_a
call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
%p_b = bitcast [4 x i8]* @g32_b to i32*
%l_b = load i32, i32* %p_b, align 1
- %l_b2 = add i32 %l_b, 1
+ %l_b2 = sub i32 1, %l_b
store i32 %l_b2, i32* %p_b, align 1
%p_c = bitcast [4 x i8]* @g32_c to i32*
call void @llvm.nacl.atomic.fence.all()
%l_c = load i32, i32* %p_c, align 1
- %l_c2 = add i32 %l_c, 1
+ %l_c2 = sub i32 1, %l_c
store i32 %l_c2, i32* %p_c, align 1
ret i32 %l_c2
}
-; CHECK-LABEL: test_fused_load_add_b
+; CHECK-LABEL: test_fused_load_sub_b
; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
; CHECK: mov DWORD PTR
; CHECK: mfence
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
; CHECK: mov DWORD PTR
; CHECK: mfence
-; Load + add can still be optimized into one instruction
+; Load + sub can still be optimized into one instruction
; because it is not separated by a fence.
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
; CHECK: mov DWORD PTR
-; Test with the fence splitting a load/add.
-define i32 @test_fused_load_add_c() {
+; Test with the fence splitting a load/sub.
+define i32 @test_fused_load_sub_c() {
entry:
%p_alloca = alloca i8, i32 4, align 4
%p_alloca_bc = bitcast i8* %p_alloca to i32*
@@ -103,38 +103,39 @@ entry:
%p_a = bitcast [4 x i8]* @g32_a to i32*
%l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
- %l_a2 = add i32 %l_a, 1
+ %l_a2 = sub i32 1, %l_a
call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
%p_b = bitcast [4 x i8]* @g32_b to i32*
%l_b = load i32, i32* %p_b, align 1
call void @llvm.nacl.atomic.fence.all()
- %l_b2 = add i32 %l_b, 1
+ %l_b2 = sub i32 1, %l_b
store i32 %l_b2, i32* %p_b, align 1
%p_c = bitcast [4 x i8]* @g32_c to i32*
%l_c = load i32, i32* %p_c, align 1
- %l_c2 = add i32 %l_c, 1
+ %l_c2 = sub i32 1, %l_c
store i32 %l_c2, i32* %p_c, align 1
ret i32 %l_c2
}
-; CHECK-LABEL: test_fused_load_add_c
+; CHECK-LABEL: test_fused_load_sub_c
; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
; CHECK: mov DWORD PTR
; CHECK: mfence
-; This load + add are no longer optimized into one,
+; This load + sub are no longer optimized into one,
; though perhaps it should be legal as long as
; the load stays on the same side of the fence.
; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b
; CHECK: mfence
-; CHECK: add {{.*}},0x1
+; CHECK: mov {{.*}},0x1
+; CHECK: sub
; CHECK: mov DWORD PTR
-; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c
+; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
; CHECK: mov DWORD PTR

Powered by Google App Engine
This is Rietveld 408576698