tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll - Issue 509233002: Convert lit tests to check disassembled assembly.

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 509233002: Convert lit tests to check disassembled assembly. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: add comment Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« tests_lit/llvm2ice_tests/fp.pnacl.ll ('K') | « tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | tests_lit/llvm2ice_tests/shift.ll » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.	1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.

2 ; This should apply to both atomic and non-atomic loads/stores	2 ; This should apply to both atomic and non-atomic loads/stores

3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only	3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only

4 ; applies to atomic load/stores).	4 ; applies to atomic load/stores).

5 ;	5 ;

6 ; RUN: %llvm2ice -O2 --verbose none %s \| FileCheck %s

7 ; RUN: %llvm2ice -O2 --verbose none %s \	6 ; RUN: %llvm2ice -O2 --verbose none %s \

8 ; RUN: \| llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj	7 ; RUN: \| llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \

	8 ; RUN: \| llvm-objdump -d -symbolize -x86-asm-syntax=intel - \| FileCheck %s

	9

	10 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we

	11 ; have [0] == g32_a, [4] == g32_b, [8] == g32_c, etc.

9	12

10 declare void @llvm.nacl.atomic.fence.all()	13 declare void @llvm.nacl.atomic.fence.all()

11 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)	14 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)

12 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)	15 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)

13	16

14 @g32_a = internal global [4 x i8] zeroinitializer, align 4	17 @g32_a = internal global [4 x i8] zeroinitializer, align 4

15 @g32_b = internal global [4 x i8] zeroinitializer, align 4	18 @g32_b = internal global [4 x i8] zeroinitializer, align 4

16 @g32_c = internal global [4 x i8] zeroinitializer, align 4	19 @g32_c = internal global [4 x i8] zeroinitializer, align 4

17 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4	20 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4

18	21

(...skipping 19 matching lines...) Expand all Loading...
38 call void @llvm.nacl.atomic.fence.all()	41 call void @llvm.nacl.atomic.fence.all()

39 store i32 %l_c2, i32* %p_c, align 1	42 store i32 %l_c2, i32* %p_c, align 1

40	43

41 ret i32 %l_c2	44 ret i32 %l_c2

42 }	45 }

43 ; CHECK-LABEL: test_fused_load_add_a	46 ; CHECK-LABEL: test_fused_load_add_a

44 ; alloca store	47 ; alloca store

45 ; CHECK: mov {{.*}}, esp	48 ; CHECK: mov {{.*}}, esp

46 ; CHECK: mov dword ptr {{.*}}, 999	49 ; CHECK: mov dword ptr {{.*}}, 999

47 ; atomic store (w/ its own mfence)	50 ; atomic store (w/ its own mfence)

48 ; CHECK: lea {{.*}}, g32_a	51 ; CHECK: lea {{.*}}, dword ptr [0]

49 ; The load + add are optimized into one everywhere.	52 ; The load + add are optimized into one everywhere.

50 ; CHECK: add {{.*}}, dword ptr	53 ; CHECK: add {{.*}}, dword ptr

51 ; CHECK: mov dword ptr	54 ; CHECK: mov dword ptr

52 ; CHECK: mfence	55 ; CHECK: mfence

53 ; CHECK: lea {{.*}}, g32_b	56 ; CHECK: lea {{.*}}, dword ptr [4]

54 ; CHECK: add {{.*}}, dword ptr	57 ; CHECK: add {{.*}}, dword ptr

55 ; CHECK: mov dword ptr	58 ; CHECK: mov dword ptr

56 ; CHECK: lea {{.*}}, g32_c	59 ; CHECK: lea {{.*}}, dword ptr [8]

57 ; CHECK: add {{.*}}, dword ptr	60 ; CHECK: add {{.*}}, dword ptr

58 ; CHECK: mfence	61 ; CHECK: mfence

59 ; CHECK: mov dword ptr	62 ; CHECK: mov dword ptr

60	63

61 ; Test with the fence moved up a bit.	64 ; Test with the fence moved up a bit.

62 define i32 @test_fused_load_add_b() {	65 define i32 @test_fused_load_add_b() {

63 entry:	66 entry:

64 %p_alloca = alloca i8, i32 4, align 4	67 %p_alloca = alloca i8, i32 4, align 4

65 %p_alloca_bc = bitcast i8* %p_alloca to i32*	68 %p_alloca_bc = bitcast i8* %p_alloca to i32*

66 store i32 999, i32* %p_alloca_bc, align 1	69 store i32 999, i32* %p_alloca_bc, align 1

(...skipping 14 matching lines...) Expand all Loading...
81 %l_c2 = add i32 %l_c, 1	84 %l_c2 = add i32 %l_c, 1

82 store i32 %l_c2, i32* %p_c, align 1	85 store i32 %l_c2, i32* %p_c, align 1

83	86

84 ret i32 %l_c2	87 ret i32 %l_c2

85 }	88 }

86 ; CHECK-LABEL: test_fused_load_add_b	89 ; CHECK-LABEL: test_fused_load_add_b

87 ; alloca store	90 ; alloca store

88 ; CHECK: mov {{.*}}, esp	91 ; CHECK: mov {{.*}}, esp

89 ; CHECK: mov dword ptr {{.*}}, 999	92 ; CHECK: mov dword ptr {{.*}}, 999

90 ; atomic store (w/ its own mfence)	93 ; atomic store (w/ its own mfence)

91 ; CHECK: lea {{.*}}, g32_a	94 ; CHECK: lea {{.*}}, dword ptr [0]

92 ; CHECK: add {{.*}}, dword ptr	95 ; CHECK: add {{.*}}, dword ptr

93 ; CHECK: mov dword ptr	96 ; CHECK: mov dword ptr

94 ; CHECK: mfence	97 ; CHECK: mfence

95 ; CHECK: lea {{.*}}, g32_b	98 ; CHECK: lea {{.*}}, dword ptr [4]

96 ; CHECK: add {{.*}}, dword ptr	99 ; CHECK: add {{.*}}, dword ptr

97 ; CHECK: mov dword ptr	100 ; CHECK: mov dword ptr

98 ; CHECK: lea {{.*}}, g32_c	101 ; CHECK: lea {{.*}}, dword ptr [8]

99 ; CHECK: mfence	102 ; CHECK: mfence

100 ; Load + add can still be optimized into one instruction	103 ; Load + add can still be optimized into one instruction

101 ; because it is not separated by a fence.	104 ; because it is not separated by a fence.

102 ; CHECK: add {{.*}}, dword ptr	105 ; CHECK: add {{.*}}, dword ptr

103 ; CHECK: mov dword ptr	106 ; CHECK: mov dword ptr

104	107

105 ; Test with the fence splitting a load/add.	108 ; Test with the fence splitting a load/add.

106 define i32 @test_fused_load_add_c() {	109 define i32 @test_fused_load_add_c() {

107 entry:	110 entry:

108 %p_alloca = alloca i8, i32 4, align 4	111 %p_alloca = alloca i8, i32 4, align 4

(...skipping 16 matching lines...) Expand all Loading...
125 %l_c2 = add i32 %l_c, 1	128 %l_c2 = add i32 %l_c, 1

126 store i32 %l_c2, i32* %p_c, align 1	129 store i32 %l_c2, i32* %p_c, align 1

127	130

128 ret i32 %l_c2	131 ret i32 %l_c2

129 }	132 }

130 ; CHECK-LABEL: test_fused_load_add_c	133 ; CHECK-LABEL: test_fused_load_add_c

131 ; alloca store	134 ; alloca store

132 ; CHECK: mov {{.*}}, esp	135 ; CHECK: mov {{.*}}, esp

133 ; CHECK: mov dword ptr {{.*}}, 999	136 ; CHECK: mov dword ptr {{.*}}, 999

134 ; atomic store (w/ its own mfence)	137 ; atomic store (w/ its own mfence)

135 ; CHECK: lea {{.*}}, g32_a	138 ; CHECK: lea {{.*}}, dword ptr [0]

136 ; CHECK: add {{.*}}, dword ptr	139 ; CHECK: add {{.*}}, dword ptr

137 ; CHECK: mov dword ptr	140 ; CHECK: mov dword ptr

138 ; CHECK: mfence	141 ; CHECK: mfence

139 ; CHECK: lea {{.*}}, g32_b	142 ; CHECK: lea {{.*}}, dword ptr [4]

140 ; This load + add are no longer optimized into one,	143 ; This load + add are no longer optimized into one,

141 ; though perhaps it should be legal as long as	144 ; though perhaps it should be legal as long as

142 ; the load stays on the same side of the fence.	145 ; the load stays on the same side of the fence.

143 ; CHECK: mov {{.*}}, dword ptr	146 ; CHECK: mov {{.*}}, dword ptr

144 ; CHECK: mfence	147 ; CHECK: mfence

145 ; CHECK: add {{.*}}, 1	148 ; CHECK: add {{.*}}, 1

146 ; CHECK: mov dword ptr	149 ; CHECK: mov dword ptr

147 ; CHECK: lea {{.*}}, g32_c	150 ; CHECK: lea {{.*}}, dword ptr [8]

148 ; CHECK: add {{.*}}, dword ptr	151 ; CHECK: add {{.*}}, dword ptr

149 ; CHECK: mov dword ptr	152 ; CHECK: mov dword ptr

150	153

151	154

152 ; Test where a bunch of i8 loads could have been fused into one	155 ; Test where a bunch of i8 loads could have been fused into one

153 ; i32 load, but a fence blocks that.	156 ; i32 load, but a fence blocks that.

154 define i32 @could_have_fused_loads() {	157 define i32 @could_have_fused_loads() {

155 entry:	158 entry:

156 %ptr1 = bitcast [4 x i8]* @g32_d to i8*	159 %ptr1 = bitcast [4 x i8]* @g32_d to i8*

157 %b1 = load i8* %ptr1	160 %b1 = load i8* %ptr1

(...skipping 19 matching lines...) Expand all Loading...
177 %b12 = or i32 %b1.ext, %b2.shift	180 %b12 = or i32 %b1.ext, %b2.shift

178 %b3.ext = zext i8 %b3 to i32	181 %b3.ext = zext i8 %b3 to i32

179 %b3.shift = shl i32 %b3.ext, 16	182 %b3.shift = shl i32 %b3.ext, 16

180 %b123 = or i32 %b12, %b3.shift	183 %b123 = or i32 %b12, %b3.shift

181 %b4.ext = zext i8 %b4 to i32	184 %b4.ext = zext i8 %b4 to i32

182 %b4.shift = shl i32 %b4.ext, 24	185 %b4.shift = shl i32 %b4.ext, 24

183 %b1234 = or i32 %b123, %b4.shift	186 %b1234 = or i32 %b123, %b4.shift

184 ret i32 %b1234	187 ret i32 %b1234

185 }	188 }

186 ; CHECK-LABEL: could_have_fused_loads	189 ; CHECK-LABEL: could_have_fused_loads

187 ; CHECK: lea {{.*}}, g32_d	190 ; CHECK: lea {{.*}},
	Jim Stichnoth 2014/08/28 20:19:23 Should g32_d turn into "dword ptr [12]"? Should g32_d turn into "dword ptr [12]"? jvoung (off chromium) 2014/08/29 00:51:20 This is actually going to be "dword ptr [0]", beca Show quoted text On 2014/08/28 20:19:23, stichnot wrote: > Should g32_d turn into "dword ptr [12]"? This is actually going to be "dword ptr [0]", because it's in .data instead of .bss, I think.
188 ; CHECK: mov {{.*}}, byte ptr	191 ; CHECK: mov {{.*}}, byte ptr

189 ; CHECK: mov {{.*}}, byte ptr	192 ; CHECK: mov {{.*}}, byte ptr

190 ; CHECK: mov {{.*}}, byte ptr	193 ; CHECK: mov {{.*}}, byte ptr

191 ; CHECK: mfence	194 ; CHECK: mfence

192 ; CHECK: mov {{.*}}, byte ptr	195 ; CHECK: mov {{.*}}, byte ptr

193	196

194	197

195 ; Test where an identical load from two branches could have been hoisted	198 ; Test where an identical load from two branches could have been hoisted

196 ; up, and then the code merged, but a fence prevents it.	199 ; up, and then the code merged, but a fence prevents it.

197 define i32 @could_have_hoisted_loads(i32 %x) {	200 define i32 @could_have_hoisted_loads(i32 %x) {

198 entry:	201 entry:

199 %ptr = bitcast [4 x i8]* @g32_d to i32*	202 %ptr = bitcast [4 x i8]* @g32_d to i32*

200 %cmp = icmp eq i32 %x, 1	203 %cmp = icmp eq i32 %x, 1

201 br i1 %cmp, label %branch1, label %branch2	204 br i1 %cmp, label %branch1, label %branch2

202 branch1:	205 branch1:

203 %y = load i32* %ptr	206 %y = load i32* %ptr

204 ret i32 %y	207 ret i32 %y

205 branch2:	208 branch2:

206 call void @llvm.nacl.atomic.fence.all()	209 call void @llvm.nacl.atomic.fence.all()

207 %z = load i32* %ptr	210 %z = load i32* %ptr

208 ret i32 %z	211 ret i32 %z

209 }	212 }

210 ; CHECK-LABEL: could_have_hoisted_loads	213 ; CHECK-LABEL: could_have_hoisted_loads

211 ; CHECK: lea {{.*}}, g32_d	214 ; CHECK: lea {{.*}},

212 ; CHECK: je {{.*}}	215 ; CHECK: je {{.*}}

213 ; CHECK: jmp {{.*}}	216 ; CHECK: jmp {{.*}}

214 ; CHECK: mov {{.*}}, dword ptr	217 ; CHECK: mov {{.*}}, dword ptr

215 ; CHECK: ret	218 ; CHECK: ret

216 ; CHECK: mfence	219 ; CHECK: mfence

217 ; CHECK: mov {{.*}}, dword ptr	220 ; CHECK: mov {{.*}}, dword ptr

218 ; CHECK: ret	221 ; CHECK: ret

OLD	NEW