| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8-W32 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-W32 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-W32 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-W32 %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX8-W64 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX9-W64 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11-W64 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64 %s |
| |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8-W32-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-W32-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-W32-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-W32-GISEL %s |
| |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX8-W64-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX9-W64-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11-W64-GISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64-GISEL %s |
| |
| ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-SDAG-ERR %s |
| ; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s |
| |
| ; GFX6-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute |
| ; GFX6-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float) |
| |
| ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-SDAG-ERR %s |
| ; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s |
| |
| ; GFX7-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute |
| ; GFX7-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float) |
| |
| |
| |
| define float @test_wave_shuffle_float(float %val, i32 %idx) { |
| ; GFX8-W32-LABEL: test_wave_shuffle_float: |
| ; GFX8-W32: ; %bb.0: ; %entry |
| ; GFX8-W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-W32-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX8-W32-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX8-W32-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-W32-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-W32-LABEL: test_wave_shuffle_float: |
| ; GFX9-W32: ; %bb.0: ; %entry |
| ; GFX9-W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-W32-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX9-W32-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX9-W32-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-W32-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-W32-LABEL: test_wave_shuffle_float: |
| ; GFX11-W32: ; %bb.0: ; %entry |
| ; GFX11-W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-W32-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX11-W32-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX11-W32-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-W32-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-W32-LABEL: test_wave_shuffle_float: |
| ; GFX12-W32: ; %bb.0: ; %entry |
| ; GFX12-W32-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-W32-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-W32-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-W32-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-W32-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-W32-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX12-W32-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX12-W32-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-W32-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-W64-LABEL: test_wave_shuffle_float: |
| ; GFX8-W64: ; %bb.0: ; %entry |
| ; GFX8-W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-W64-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX8-W64-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX8-W64-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-W64-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-W64-LABEL: test_wave_shuffle_float: |
| ; GFX9-W64: ; %bb.0: ; %entry |
| ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-W64-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX9-W64-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-W64-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-W64-LABEL: test_wave_shuffle_float: |
| ; GFX11-W64: ; %bb.0: ; %entry |
| ; GFX11-W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-W64-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill |
| ; GFX11-W64-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-NEXT: v_lshlrev_b32_e32 v3, 2, v1 |
| ; GFX11-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec |
| ; GFX11-W64-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $exec |
| ; GFX11-W64-NEXT: s_or_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-NEXT: v_permlane64_b32 v2, v0 |
| ; GFX11-W64-NEXT: ds_bpermute_b32 v2, v3, v2 |
| ; GFX11-W64-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-NEXT: ds_bpermute_b32 v0, v3, v0 |
| ; GFX11-W64-NEXT: v_mbcnt_lo_u32_b32 v3, -1, 0 |
| ; GFX11-W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) |
| ; GFX11-W64-NEXT: v_xor_b32_e32 v1, v3, v1 |
| ; GFX11-W64-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-W64-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX11-W64-NEXT: v_and_b32_e32 v1, 32, v1 |
| ; GFX11-W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| ; GFX11-W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX11-W64-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-W64-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc |
| ; GFX11-W64-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload |
| ; GFX11-W64-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-W64-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-W64-LABEL: test_wave_shuffle_float: |
| ; GFX12-W64: ; %bb.0: ; %entry |
| ; GFX12-W64-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-W64-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-W64-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-W64-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-W64-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-W64-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX12-W64-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX12-W64-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-W64-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-W32-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX8-W32-GISEL: ; %bb.0: ; %entry |
| ; GFX8-W32-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-W32-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX8-W32-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX8-W32-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-W32-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-W32-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX9-W32-GISEL: ; %bb.0: ; %entry |
| ; GFX9-W32-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-W32-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX9-W32-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX9-W32-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-W32-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-W32-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX11-W32-GISEL: ; %bb.0: ; %entry |
| ; GFX11-W32-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-W32-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX11-W32-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX11-W32-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-W32-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-W32-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX12-W32-GISEL: ; %bb.0: ; %entry |
| ; GFX12-W32-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX12-W32-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX12-W32-GISEL-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-W32-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-W64-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX8-W64-GISEL: ; %bb.0: ; %entry |
| ; GFX8-W64-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-W64-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX8-W64-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX8-W64-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-W64-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-W64-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX9-W64-GISEL: ; %bb.0: ; %entry |
| ; GFX9-W64-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-W64-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX9-W64-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX9-W64-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-W64-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-W64-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX11-W64-GISEL: ; %bb.0: ; %entry |
| ; GFX11-W64-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-W64-GISEL-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-GISEL-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill |
| ; GFX11-W64-GISEL-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX11-W64-GISEL-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec |
| ; GFX11-W64-GISEL-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec |
| ; GFX11-W64-GISEL-NEXT: s_or_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-GISEL-NEXT: v_permlane64_b32 v2, v0 |
| ; GFX11-W64-GISEL-NEXT: ds_bpermute_b32 v2, v1, v2 |
| ; GFX11-W64-GISEL-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX11-W64-GISEL-NEXT: v_mbcnt_lo_u32_b32 v3, -1, 0 |
| ; GFX11-W64-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) |
| ; GFX11-W64-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 |
| ; GFX11-W64-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-W64-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX11-W64-GISEL-NEXT: v_and_b32_e32 v1, 32, v1 |
| ; GFX11-W64-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) |
| ; GFX11-W64-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX11-W64-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-W64-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc |
| ; GFX11-W64-GISEL-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX11-W64-GISEL-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload |
| ; GFX11-W64-GISEL-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX11-W64-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-W64-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-W64-GISEL-LABEL: test_wave_shuffle_float: |
| ; GFX12-W64-GISEL: ; %bb.0: ; %entry |
| ; GFX12-W64-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| ; GFX12-W64-GISEL-NEXT: ds_bpermute_b32 v0, v1, v0 |
| ; GFX12-W64-GISEL-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-W64-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %0 = tail call float @llvm.amdgcn.wave.shuffle(float %val, i32 %idx) |
| ret float %0 |
| } |