OLD | NEW |
(Empty) | |
| 1 ; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \ |
| 2 ; RUN: FileCheck %s |
| 3 ; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \ |
| 4 ; RUN: FileCheck %s |
| 5 ; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \ |
| 6 ; RUN: FileCheck %s -check-prefix=ELIM |
| 7 ; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \ |
| 8 ; RUN: FileCheck %s -check-prefix=CLEANED |
| 9 |
| 10 ; ``asm("":::"memory")`` is used as a compiler barrier and the GCC-style |
| 11 ; builtin ``__sync_synchronize`` is intended as a barrier for all memory |
| 12 ; that could be observed by external threads. They both get rewritten |
| 13 ; for NaCl by Clang to a sequentially-consistent fence surrounded by |
| 14 ; ``call void asm sideeffect "", "~{memory}"``. |
| 15 ; |
| 16 ; The test is also run at O3 to make sure that non-volatile and |
| 17 ; non-atomic loads and stores to escaping objects (i.e. loads and stores |
| 18 ; which could be observed by other threads) don't get unexpectedly |
| 19 ; eliminated. |
| 20 |
| 21 ; CLEANED-NOT: asm |
| 22 |
| 23 target datalayout = "p:32:32:32" |
| 24 |
| 25 @a = external global i32 |
| 26 @b = external global i32 |
| 27 |
| 28 ; Different triples encode ``asm("":::"memory")``'s "touch everything" |
| 29 ; constraints differently. They should get detected and removed. |
| 30 define void @memory_assembly_encoding_test() { |
| 31 ; CHECK: @memory_assembly_encoding_test() |
| 32 call void asm sideeffect "", "~{memory}"() |
| 33 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() |
| 34 call void asm sideeffect "", "~{foo},~{memory},~{bar}"() |
| 35 |
| 36 ret void |
| 37 ; CHECK-NEXT: ret void |
| 38 } |
| 39 |
| 40 define void @memory_assembly_ordering_test() { |
| 41 ; CHECK: @memory_assembly_ordering_test() |
| 42 %1 = load i32* @a, align 4 |
| 43 store i32 %1, i32* @b, align 4 |
| 44 call void asm sideeffect "", "~{memory}"() |
| 45 fence seq_cst |
| 46 call void asm sideeffect "", "~{memory}"() |
| 47 ; CHECK-NEXT: %1 = load i32* @a, align 4 |
| 48 ; CHECK-NEXT: store i32 %1, i32* @b, align 4 |
| 49 ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all() |
| 50 |
| 51 ; Redundant load from the previous location, and store to the same |
| 52 ; location (making the previous one dead). Shouldn't get eliminated |
| 53 ; because of the fence. |
| 54 %2 = load i32* @a, align 4 |
| 55 store i32 %2, i32* @b, align 4 |
| 56 call void asm sideeffect "", "~{memory}"() |
| 57 fence seq_cst |
| 58 call void asm sideeffect "", "~{memory}"() |
| 59 ; CHECK-NEXT: %2 = load i32* @a, align 4 |
| 60 ; CHECK-NEXT: store i32 %2, i32* @b, align 4 |
| 61 ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all() |
| 62 |
| 63 ; Same here. |
| 64 %3 = load i32* @a, align 4 |
| 65 store i32 %3, i32* @b, align 4 |
| 66 ; CHECK-NEXT: %3 = load i32* @a, align 4 |
| 67 ; CHECK-NEXT: store i32 %3, i32* @b, align 4 |
| 68 |
| 69 ret void |
| 70 ; CHECK-NEXT: ret void |
| 71 } |
| 72 |
| 73 ; Same function as above, but without the barriers. At O3 some loads and |
| 74 ; stores should get eliminated. |
| 75 define void @memory_ordering_test() { |
| 76 ; ELIM: @memory_ordering_test() |
| 77 %1 = load i32* @a, align 4 |
| 78 store i32 %1, i32* @b, align 4 |
| 79 %2 = load i32* @a, align 4 |
| 80 store i32 %2, i32* @b, align 4 |
| 81 %3 = load i32* @a, align 4 |
| 82 store i32 %3, i32* @b, align 4 |
| 83 ; ELIM-NEXT: %1 = load i32* @a, align 4 |
| 84 ; ELIM-NEXT: store i32 %1, i32* @b, align 4 |
| 85 |
| 86 ret void |
| 87 ; ELIM-NEXT: ret void |
| 88 } |
OLD | NEW |