| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -o - %s | FileCheck %s |
| |
| --- |
| name: waterfall_divergent_call_p0_no_args |
| legalized: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| |
| ; CHECK-LABEL: name: waterfall_divergent_call_p0_no_args |
| ; CHECK: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) |
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) |
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) |
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .2: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc |
| ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0) |
| ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) |
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc |
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .4: |
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 |
| ADJCALLSTACKUP 0, 0, implicit-def $scc |
| %g_ptr:_(p0) = COPY $sgpr0_sgpr1 |
| %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0)) |
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu |
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| S_SETPC_B64_return undef $sgpr2_sgpr3 |
| |
| ... |
| |
| --- |
| name: waterfall_divergent_call_p4_no_args |
| legalized: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| |
| ; CHECK-LABEL: name: waterfall_divergent_call_p4_no_args |
| ; CHECK: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) |
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) |
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) |
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .2: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc |
| ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4) |
| ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4)) |
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc |
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .4: |
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 |
| ADJCALLSTACKUP 0, 0, implicit-def $scc |
| %g_ptr:_(p4) = COPY $sgpr0_sgpr1 |
| %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4)) |
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu |
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| S_SETPC_B64_return undef $sgpr2_sgpr3 |
| |
| ... |
| |
| --- |
| name: waterfall_divergent_call_p0_with_args |
| legalized: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| |
| ; CHECK-LABEL: name: waterfall_divergent_call_p0_with_args |
| ; CHECK: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) |
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) |
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) |
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .2: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc |
| ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0) |
| ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) |
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc |
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .4: |
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 |
| ADJCALLSTACKUP 0, 0, implicit-def $scc |
| %g_ptr:_(p0) = COPY $sgpr0_sgpr1 |
| %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0)) |
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 |
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| S_SETPC_B64_return undef $sgpr2_sgpr3 |
| |
| ... |
| |
| --- |
| name: waterfall_divergent_call_p4_with_args |
| legalized: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| |
| ; CHECK-LABEL: name: waterfall_divergent_call_p4_with_args |
| ; CHECK: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) |
| ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32) |
| ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr |
| ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) |
| ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .2: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc |
| ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1 |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4) |
| ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4)) |
| ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc |
| ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: .4: |
| ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3 |
| ADJCALLSTACKUP 0, 0, implicit-def $scc |
| %g_ptr:_(p4) = COPY $sgpr0_sgpr1 |
| %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4)) |
| $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0 |
| ADJCALLSTACKDOWN 0, 0, implicit-def $scc |
| S_SETPC_B64_return undef $sgpr2_sgpr3 |
| |
| ... |
| |