| Index: tests_lit/llvm2ice_tests/vector-align.ll
|
| diff --git a/tests_lit/llvm2ice_tests/vector-align.ll b/tests_lit/llvm2ice_tests/vector-align.ll
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4964f6c7c576cbcec0f5d0678a456991040e393c
|
| --- /dev/null
|
| +++ b/tests_lit/llvm2ice_tests/vector-align.ll
|
| @@ -0,0 +1,85 @@
|
| +; This test checks that when SSE instructions access memory and require full
|
| +; alignment, memory operands are limited to properly aligned stack operands.
|
| +; This would only happen when we fuse a load instruction with another
|
| +; instruction, which currently only happens with non-scalarized Arithmetic
|
| +; instructions.
|
| +
|
| +; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
|
| +; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s
|
| +
|
| +define <4 x i32> @test_add(i32 %addr_i, <4 x i32> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x i32>*
|
| + %loaded = load <4 x i32>* %addr, align 4
|
| + %result = add <4 x i32> %addend, %loaded
|
| + ret <4 x i32> %result
|
| +}
|
| +; CHECK-LABEL: test_add
|
| +; CHECK-NOT: paddd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: paddd xmm{{.}},
|
| +
|
| +define <4 x i32> @test_and(i32 %addr_i, <4 x i32> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x i32>*
|
| + %loaded = load <4 x i32>* %addr, align 4
|
| + %result = and <4 x i32> %addend, %loaded
|
| + ret <4 x i32> %result
|
| +}
|
| +; CHECK-LABEL: test_and
|
| +; CHECK-NOT: pand xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: pand xmm{{.}},
|
| +
|
| +define <4 x i32> @test_or(i32 %addr_i, <4 x i32> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x i32>*
|
| + %loaded = load <4 x i32>* %addr, align 4
|
| + %result = or <4 x i32> %addend, %loaded
|
| + ret <4 x i32> %result
|
| +}
|
| +; CHECK-LABEL: test_or
|
| +; CHECK-NOT: por xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: por xmm{{.}},
|
| +
|
| +define <4 x i32> @test_xor(i32 %addr_i, <4 x i32> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x i32>*
|
| + %loaded = load <4 x i32>* %addr, align 4
|
| + %result = xor <4 x i32> %addend, %loaded
|
| + ret <4 x i32> %result
|
| +}
|
| +; CHECK-LABEL: test_xor
|
| +; CHECK-NOT: pxor xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: pxor xmm{{.}},
|
| +
|
| +define <4 x i32> @test_sub(i32 %addr_i, <4 x i32> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x i32>*
|
| + %loaded = load <4 x i32>* %addr, align 4
|
| + %result = sub <4 x i32> %addend, %loaded
|
| + ret <4 x i32> %result
|
| +}
|
| +; CHECK-LABEL: test_sub
|
| +; CHECK-NOT: psubd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: psubd xmm{{.}},
|
| +
|
| +define <4 x float> @test_fadd(i32 %addr_i, <4 x float> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x float>*
|
| + %loaded = load <4 x float>* %addr, align 4
|
| + %result = fadd <4 x float> %addend, %loaded
|
| + ret <4 x float> %result
|
| +}
|
| +; CHECK-LABEL: test_fadd
|
| +; CHECK-NOT: addps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: addps xmm{{.}},
|
| +
|
| +define <4 x float> @test_fsub(i32 %addr_i, <4 x float> %addend) {
|
| +entry:
|
| + %addr = inttoptr i32 %addr_i to <4 x float>*
|
| + %loaded = load <4 x float>* %addr, align 4
|
| + %result = fsub <4 x float> %addend, %loaded
|
| + ret <4 x float> %result
|
| +}
|
| +; CHECK-LABEL: test_fsub
|
| +; CHECK-NOT: subps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
|
| +; CHECK: subps xmm{{.}},
|
|
|