| # RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s |
| --- | |
| |
| define void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 { |
| entry: |
| %cmp0 = fcmp oeq float %cond, 0.000000e+00 |
| br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 |
| |
| else: ; preds = %entry |
| store volatile i32 100, i32 addrspace(1)* undef |
| br label %done, !structurizecfg.uniform !0 |
| |
| if: ; preds = %entry |
| store volatile i32 9, i32 addrspace(1)* undef |
| br label %done, !structurizecfg.uniform !0 |
| |
| done: ; preds = %if, %else |
| %value = phi i32 [ 0, %if ], [ 1, %else ] |
| store i32 %value, i32 addrspace(1)* %out |
| ret void |
| } |
| |
| define void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { |
| entry: |
| br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 |
| |
| else: ; preds = %entry |
| store volatile i32 100, i32 addrspace(1)* undef |
| br label %done, !structurizecfg.uniform !0 |
| |
| if: ; preds = %entry |
| store volatile i32 9, i32 addrspace(1)* undef |
| br label %done, !structurizecfg.uniform !0 |
| |
| done: ; preds = %if, %else |
| %value = phi i32 [ 0, %if ], [ 1, %else ] |
| store i32 %value, i32 addrspace(1)* %out |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { readnone } |
| |
| !0 = !{} |
| |
| ... |
| --- |
| # CHECK-LABEL: name: vccz_corrupt_workaround |
| # CHECK: %vcc = V_CMP_EQ_F32 |
| # CHECK-NEXT: %vcc = S_MOV_B64 %vcc |
| # CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit killed %vcc |
| |
| name: vccz_corrupt_workaround |
| alignment: 0 |
| exposesReturnsTwice: false |
| legalized: false |
| regBankSelected: false |
| selected: false |
| tracksRegLiveness: true |
| liveins: |
| - { reg: '%sgpr0_sgpr1' } |
| frameInfo: |
| isFrameAddressTaken: false |
| isReturnAddressTaken: false |
| hasStackMap: false |
| hasPatchPoint: false |
| stackSize: 0 |
| offsetAdjustment: 0 |
| maxAlignment: 0 |
| adjustsStack: false |
| hasCalls: false |
| maxCallFrameSize: 0 |
| hasOpaqueSPAdjustment: false |
| hasVAStart: false |
| hasMustTailInVarArgFunc: false |
| body: | |
| bb.0.entry: |
| successors: %bb.2.if, %bb.1.else |
| liveins: %sgpr0_sgpr1 |
| |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`) |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) |
| %sgpr7 = S_MOV_B32 61440 |
| %sgpr6 = S_MOV_B32 -1 |
| %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, 0, implicit %exec |
| S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc |
| |
| bb.2.if: |
| successors: %bb.3.done |
| liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %vgpr0 = V_MOV_B32_e32 9, implicit %exec |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) |
| %vgpr0 = V_MOV_B32_e32 0, implicit %exec |
| S_BRANCH %bb.3.done |
| |
| bb.1.else: |
| successors: %bb.3.done |
| liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %vgpr0 = V_MOV_B32_e32 100, implicit %exec |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) |
| %vgpr0 = V_MOV_B32_e32 1, implicit %exec |
| |
| bb.3.done: |
| liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %sgpr3 = S_MOV_B32 61440 |
| %sgpr2 = S_MOV_B32 -1 |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) |
| S_ENDPGM |
| |
| ... |
| --- |
| # CHECK-LABEL: name: vccz_corrupt_undef_vcc |
| # CHECK: S_WAITCNT |
| # CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit undef %vcc |
| |
| name: vccz_corrupt_undef_vcc |
| alignment: 0 |
| exposesReturnsTwice: false |
| legalized: false |
| regBankSelected: false |
| selected: false |
| tracksRegLiveness: true |
| liveins: |
| - { reg: '%sgpr0_sgpr1' } |
| frameInfo: |
| isFrameAddressTaken: false |
| isReturnAddressTaken: false |
| hasStackMap: false |
| hasPatchPoint: false |
| stackSize: 0 |
| offsetAdjustment: 0 |
| maxAlignment: 0 |
| adjustsStack: false |
| hasCalls: false |
| maxCallFrameSize: 0 |
| hasOpaqueSPAdjustment: false |
| hasVAStart: false |
| hasMustTailInVarArgFunc: false |
| body: | |
| bb.0.entry: |
| successors: %bb.2.if, %bb.1.else |
| liveins: %sgpr0_sgpr1 |
| |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) |
| %sgpr7 = S_MOV_B32 61440 |
| %sgpr6 = S_MOV_B32 -1 |
| S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc |
| |
| bb.2.if: |
| successors: %bb.3.done |
| liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %vgpr0 = V_MOV_B32_e32 9, implicit %exec |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) |
| %vgpr0 = V_MOV_B32_e32 0, implicit %exec |
| S_BRANCH %bb.3.done |
| |
| bb.1.else: |
| successors: %bb.3.done |
| liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %vgpr0 = V_MOV_B32_e32 100, implicit %exec |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) |
| %vgpr0 = V_MOV_B32_e32 1, implicit %exec |
| |
| bb.3.done: |
| liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 |
| |
| %sgpr3 = S_MOV_B32 61440 |
| %sgpr2 = S_MOV_B32 -1 |
| BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) |
| S_ENDPGM |
| |
| ... |