OLD | NEW |
1 ; This test checks that when SSE instructions access memory and require full | 1 ; This test checks that when SSE instructions access memory and require full |
2 ; alignment, memory operands are limited to properly aligned stack operands. | 2 ; alignment, memory operands are limited to properly aligned stack operands. |
3 ; This would only happen when we fuse a load instruction with another | 3 ; This would only happen when we fuse a load instruction with another |
4 ; instruction, which currently only happens with non-scalarized Arithmetic | 4 ; instruction, which currently only happens with non-scalarized Arithmetic |
5 ; instructions. | 5 ; instructions. |
6 | 6 |
7 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 7 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
8 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s | 8 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s |
9 | 9 |
10 define <4 x i32> @test_add(i32 %addr_i, <4 x i32> %addend) { | 10 define <4 x i32> @test_add(i32 %addr_i, <4 x i32> %addend) { |
11 entry: | 11 entry: |
12 %addr = inttoptr i32 %addr_i to <4 x i32>* | 12 %addr = inttoptr i32 %addr_i to <4 x i32>* |
13 %loaded = load <4 x i32>* %addr, align 4 | 13 %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
14 %result = add <4 x i32> %addend, %loaded | 14 %result = add <4 x i32> %addend, %loaded |
15 ret <4 x i32> %result | 15 ret <4 x i32> %result |
16 } | 16 } |
17 ; CHECK-LABEL: test_add | 17 ; CHECK-LABEL: test_add |
18 ; CHECK-NOT: paddd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 18 ; CHECK-NOT: paddd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
19 ; CHECK: paddd xmm{{.}}, | 19 ; CHECK: paddd xmm{{.}}, |
20 | 20 |
21 define <4 x i32> @test_and(i32 %addr_i, <4 x i32> %addend) { | 21 define <4 x i32> @test_and(i32 %addr_i, <4 x i32> %addend) { |
22 entry: | 22 entry: |
23 %addr = inttoptr i32 %addr_i to <4 x i32>* | 23 %addr = inttoptr i32 %addr_i to <4 x i32>* |
24 %loaded = load <4 x i32>* %addr, align 4 | 24 %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
25 %result = and <4 x i32> %addend, %loaded | 25 %result = and <4 x i32> %addend, %loaded |
26 ret <4 x i32> %result | 26 ret <4 x i32> %result |
27 } | 27 } |
28 ; CHECK-LABEL: test_and | 28 ; CHECK-LABEL: test_and |
29 ; CHECK-NOT: pand xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 29 ; CHECK-NOT: pand xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
30 ; CHECK: pand xmm{{.}}, | 30 ; CHECK: pand xmm{{.}}, |
31 | 31 |
32 define <4 x i32> @test_or(i32 %addr_i, <4 x i32> %addend) { | 32 define <4 x i32> @test_or(i32 %addr_i, <4 x i32> %addend) { |
33 entry: | 33 entry: |
34 %addr = inttoptr i32 %addr_i to <4 x i32>* | 34 %addr = inttoptr i32 %addr_i to <4 x i32>* |
35 %loaded = load <4 x i32>* %addr, align 4 | 35 %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
36 %result = or <4 x i32> %addend, %loaded | 36 %result = or <4 x i32> %addend, %loaded |
37 ret <4 x i32> %result | 37 ret <4 x i32> %result |
38 } | 38 } |
39 ; CHECK-LABEL: test_or | 39 ; CHECK-LABEL: test_or |
40 ; CHECK-NOT: por xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 40 ; CHECK-NOT: por xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
41 ; CHECK: por xmm{{.}}, | 41 ; CHECK: por xmm{{.}}, |
42 | 42 |
43 define <4 x i32> @test_xor(i32 %addr_i, <4 x i32> %addend) { | 43 define <4 x i32> @test_xor(i32 %addr_i, <4 x i32> %addend) { |
44 entry: | 44 entry: |
45 %addr = inttoptr i32 %addr_i to <4 x i32>* | 45 %addr = inttoptr i32 %addr_i to <4 x i32>* |
46 %loaded = load <4 x i32>* %addr, align 4 | 46 %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
47 %result = xor <4 x i32> %addend, %loaded | 47 %result = xor <4 x i32> %addend, %loaded |
48 ret <4 x i32> %result | 48 ret <4 x i32> %result |
49 } | 49 } |
50 ; CHECK-LABEL: test_xor | 50 ; CHECK-LABEL: test_xor |
51 ; CHECK-NOT: pxor xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 51 ; CHECK-NOT: pxor xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
52 ; CHECK: pxor xmm{{.}}, | 52 ; CHECK: pxor xmm{{.}}, |
53 | 53 |
54 define <4 x i32> @test_sub(i32 %addr_i, <4 x i32> %addend) { | 54 define <4 x i32> @test_sub(i32 %addr_i, <4 x i32> %addend) { |
55 entry: | 55 entry: |
56 %addr = inttoptr i32 %addr_i to <4 x i32>* | 56 %addr = inttoptr i32 %addr_i to <4 x i32>* |
57 %loaded = load <4 x i32>* %addr, align 4 | 57 %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
58 %result = sub <4 x i32> %addend, %loaded | 58 %result = sub <4 x i32> %addend, %loaded |
59 ret <4 x i32> %result | 59 ret <4 x i32> %result |
60 } | 60 } |
61 ; CHECK-LABEL: test_sub | 61 ; CHECK-LABEL: test_sub |
62 ; CHECK-NOT: psubd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 62 ; CHECK-NOT: psubd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
63 ; CHECK: psubd xmm{{.}}, | 63 ; CHECK: psubd xmm{{.}}, |
64 | 64 |
65 define <4 x float> @test_fadd(i32 %addr_i, <4 x float> %addend) { | 65 define <4 x float> @test_fadd(i32 %addr_i, <4 x float> %addend) { |
66 entry: | 66 entry: |
67 %addr = inttoptr i32 %addr_i to <4 x float>* | 67 %addr = inttoptr i32 %addr_i to <4 x float>* |
68 %loaded = load <4 x float>* %addr, align 4 | 68 %loaded = load <4 x float>, <4 x float>* %addr, align 4 |
69 %result = fadd <4 x float> %addend, %loaded | 69 %result = fadd <4 x float> %addend, %loaded |
70 ret <4 x float> %result | 70 ret <4 x float> %result |
71 } | 71 } |
72 ; CHECK-LABEL: test_fadd | 72 ; CHECK-LABEL: test_fadd |
73 ; CHECK-NOT: addps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 73 ; CHECK-NOT: addps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
74 ; CHECK: addps xmm{{.}}, | 74 ; CHECK: addps xmm{{.}}, |
75 | 75 |
76 define <4 x float> @test_fsub(i32 %addr_i, <4 x float> %addend) { | 76 define <4 x float> @test_fsub(i32 %addr_i, <4 x float> %addend) { |
77 entry: | 77 entry: |
78 %addr = inttoptr i32 %addr_i to <4 x float>* | 78 %addr = inttoptr i32 %addr_i to <4 x float>* |
79 %loaded = load <4 x float>* %addr, align 4 | 79 %loaded = load <4 x float>, <4 x float>* %addr, align 4 |
80 %result = fsub <4 x float> %addend, %loaded | 80 %result = fsub <4 x float> %addend, %loaded |
81 ret <4 x float> %result | 81 ret <4 x float> %result |
82 } | 82 } |
83 ; CHECK-LABEL: test_fsub | 83 ; CHECK-LABEL: test_fsub |
84 ; CHECK-NOT: subps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} | 84 ; CHECK-NOT: subps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
85 ; CHECK: subps xmm{{.}}, | 85 ; CHECK: subps xmm{{.}}, |
OLD | NEW |