| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: opt -S -passes=gvn < %s | FileCheck %s |
| |
| @a = external constant i32 |
| ; We can value forward across the fence since we can (semantically) |
| ; reorder the following load before the fence. |
| define i32 @test(ptr %addr.i) { |
| ; CHECK-LABEL: define i32 @test |
| ; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { |
| ; CHECK-NEXT: store i32 5, ptr [[ADDR_I]], align 4 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: ret i32 5 |
| ; |
| store i32 5, ptr %addr.i, align 4 |
| fence release |
| %a = load i32, ptr %addr.i, align 4 |
| ret i32 %a |
| } |
| |
| ; Same as above |
| define i32 @test2(ptr %addr.i) { |
| ; CHECK-LABEL: define i32 @test2 |
| ; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %a = load i32, ptr %addr.i, align 4 |
| fence release |
| %a2 = load i32, ptr %addr.i, align 4 |
| %res = sub i32 %a, %a2 |
| ret i32 %res |
| } |
| |
| ; We can not value forward across an acquire barrier since we might |
| ; be syncronizing with another thread storing to the same variable |
| ; followed by a release fence. This is not so much enforcing an |
| ; ordering property (though it is that too), but a liveness |
| ; property. We expect to eventually see the value of store by |
| ; another thread when spinning on that location. |
| define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) { |
| ; CHECK-LABEL: define i32 @test3 |
| ; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) { |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[A2:%.*]] = load i32, ptr [[ADDR_I]], align 4 |
| ; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[A2]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| ; the following code is intented to model the unrolling of |
| ; two iterations in a spin loop of the form: |
| ; do { fence acquire: tmp = *%addr.i; ) while (!tmp); |
| ; It's hopefully clear that allowing PRE to turn this into: |
| ; if (!*%addr.i) while(true) {} would be unfortunate |
| fence acquire |
| %a = load i32, ptr %addr.i, align 4 |
| fence acquire |
| %a2 = load i32, ptr %addr.i, align 4 |
| %res = sub i32 %a, %a2 |
| ret i32 %res |
| } |
| |
| ; We can forward the value forward the load |
| ; across both the fences, because the load is from |
| ; a constant memory location. |
| define i32 @test4(ptr %addr) { |
| ; CHECK-LABEL: define i32 @test4 |
| ; CHECK-SAME: (ptr [[ADDR:%.*]]) { |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: store i32 42, ptr [[ADDR]], align 8 |
| ; CHECK-NEXT: fence seq_cst |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %var = load i32, ptr @a |
| fence release |
| store i32 42, ptr %addr, align 8 |
| fence seq_cst |
| %var2 = load i32, ptr @a |
| %var3 = sub i32 %var, %var2 |
| ret i32 %var3 |
| } |
| |
| ; Another example of why forwarding across an acquire fence is problematic |
| ; can be seen in a normal locking operation. Say we had: |
| ; *p = 5; unlock(l); lock(l); use(p); |
| ; forwarding the store to p would be invalid. A reasonable implementation |
| ; of unlock and lock might be: |
| ; unlock() { atomicrmw sub %l, 1 unordered; fence release } |
| ; lock() { |
| ; do { |
| ; %res = cmpxchg %p, 0, 1, monotonic monotonic |
| ; } while(!%res.success) |
| ; fence acquire; |
| ; } |
| ; Given we chose to forward across the release fence, we clearly can't forward |
| ; across the acquire fence as well. |