| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefix=GFX906-SDAG %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942-SDAG %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefix=GFX1012-SDAG %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100-SDAG %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s |
| |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefix=GFX906-ISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942-ISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefix=GFX1012-ISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100-ISEL %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-ISEL %s |
| |
| ;;============================================================================== |
| ;; A few basic test cases |
| ;;============================================================================== |
| |
| ;; vgpr pointer, wavefront scope |
| define <4 x i32> @av_global_load_b128_0_00(ptr addrspace(1) %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_0_00: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_0_00: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_0_00: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_0_00: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_0_00: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_0_00: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_0_00: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_0_00: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_0_00: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_0_00: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !0) |
| ret <4 x i32> %data |
| } |
| |
| ;; vgpr pointer, workgroup scope |
| define <4 x i32> @av_global_load_b128_0_01(ptr addrspace(1) %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_0_01: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_0_01: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_0_01: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_0_01: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_0_01: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_0_01: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_0_01: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_0_01: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_0_01: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_0_01: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1) |
| ret <4 x i32> %data |
| } |
| |
| ;; vgpr pointer, agent scope |
| define <4 x i32> @av_global_load_b128_0_10(ptr addrspace(1) %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_0_10: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_0_10: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_0_10: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_0_10: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_0_10: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_0_10: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_0_10: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_0_10: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_0_10: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_0_10: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2) |
| ret <4 x i32> %data |
| } |
| |
| ;; vgpr pointer, system scope |
| define <4 x i32> @av_global_load_b128_0_11(ptr addrspace(1) %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_0_11: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_0_11: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_0_11: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_0_11: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_0_11: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_0_11: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_0_11: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_0_11: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_0_11: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_0_11: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3) |
| ret <4 x i32> %data |
| } |
| |
| ;; sgpr pointer, wavefront scope |
| define <4 x i32> @av_global_load_b128_saddr_0_00(ptr addrspace(1) inreg %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_00: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !0) |
| ret <4 x i32> %data |
| } |
| |
| ;; sgpr pointer, workgroup scope |
| define <4 x i32> @av_global_load_b128_saddr_0_01(ptr addrspace(1) inreg %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_01: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1) |
| ret <4 x i32> %data |
| } |
| |
| ;; sgpr pointer, agent scope |
| define <4 x i32> @av_global_load_b128_saddr_0_02(ptr addrspace(1) inreg %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_02: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2) |
| ret <4 x i32> %data |
| } |
| |
| ;; sgpr pointer, system scope |
| define <4 x i32> @av_global_load_b128_saddr_0_03(ptr addrspace(1) inreg %addr) { |
| ; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX906-SDAG: ; %bb.0: ; %entry |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1012-SDAG: ; %bb.0: ; %entry |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1100-SDAG: ; %bb.0: ; %entry |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1250-SDAG: ; %bb.0: ; %entry |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX906-ISEL: ; %bb.0: ; %entry |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX942-ISEL: ; %bb.0: ; %entry |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1012-ISEL: ; %bb.0: ; %entry |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1100-ISEL: ; %bb.0: ; %entry |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_03: |
| ; GFX1250-ISEL: ; %bb.0: ; %entry |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| entry: |
| %data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3) |
| ret <4 x i32> %data |
| } |
| |
| ;;============================================================================== |
| ;; Signed offset addressing modes (derived from global-saddr-load.ll) |
| ;;============================================================================== |
| ;;------------------------------------------------------------------------------ |
| ;; No vgpr offset, constants |
| ;;------------------------------------------------------------------------------ |
| |
| ;; base only |
| define <4 x float> @global_load_i8_offset_0(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %sbase, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx9 immediate offset |
| define <4 x float> @global_load_i8_offset_4095(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx9 immediate offset + 1 |
| define <4 x float> @global_load_i8_offset_4096(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx9 immediate offset + 2 |
| define <4 x float> @global_load_i8_offset_4097(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx9 immediate offset |
| define <4 x float> @global_load_i8_offset_neg4096(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx9 immediate offset -1 |
| define <4 x float> @global_load_i8_offset_neg4097(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx9 immediate offset -2 |
| define <4 x float> @global_load_i8_offset_neg4098(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg4098: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg4098: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg4098: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg4098: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4098: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx10 immediate offset |
| define <4 x float> @global_load_i8_offset_2048(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx10 immediate offset + 1 |
| define <4 x float> @global_load_i8_offset_2049(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x801, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum gfx10 immediate offset + 2 |
| define <4 x float> @global_load_i8_offset_2050(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_2050: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_2050: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_2050: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_2050: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_2050: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_2050: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_2050: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_2050: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x802, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_2050: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_2050: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx10 immediate offset |
| define <4 x float> @global_load_i8_offset_neg2048(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx10 immediate offset - 1 |
| define <4 x float> @global_load_i8_offset_neg2049(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; base with maximum negative gfx10 immediate offset - 1 |
| define <4 x float> @global_load_i8_offset_neg2050(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg2050: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg2050: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg2050: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg2050: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2050: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; maximum gfx12 saddr positive offset |
| define <4 x float> @global_load_i8_offset_0x7FFFFF(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0x7FFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; maximum gfx12 saddr negative offset |
| define <4 x float> @global_load_i8_offset_0xFFFFFF(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; 32-bit unsigned max offset |
| define <4 x float> @global_load_i8_offset_0xFFFFFFFF(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset |
| define <4 x float> @global_load_i8_offset_0x100000000(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 1, v1 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + 1 |
| define <4 x float> @global_load_i8_offset_0x100000001(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000001: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + max gfx9 imm |
| define <4 x float> @global_load_i8_offset_0x100000FFF(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000FFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + max gfx9 imm + 1 |
| define <4 x float> @global_load_i8_offset_0x100001000(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100001000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative 32-bit unsigned max offset |
| define <4 x float> @global_load_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative exceeds 32-bit offset |
| define <4 x float> @global_load_i8_offset_neg0x100000000(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_u32_e32 v1, -1, v1 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v1, -1, v1 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative exceeds 32-bit offset + 1 |
| define <4 x float> @global_load_i8_offset_neg0x100000001(ptr addrspace(1) %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000001: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Basic addressing patterns |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Basic pattern, no immediate offset. |
| define <4 x float> @global_load_i8_zext_vgpr(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx9 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_4095(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx9 + 1 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_4096(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx9 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx9 - 1 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx10 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx10 + 1 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_2048(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx10 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx10 - 1 |
| define <4 x float> @global_load_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx12. |
| define <4 x float> @global_load_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64 |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Minimum offset on gfx12. |
| define <4 x float> @global_load_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64 |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| |
| ;; Maximum positive offset on gfx9, and immediate needs to be moved lower. |
| define <4 x float> @global_load_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095 |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; pointer addressing done in integers |
| define <4 x float> @global_load_i8_zext_vgpr_ptrtoint(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %sbase.as.int, %zext.offset |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression |
| define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression, with immediate offset |
| define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %add.immoffset = add i64 %add, 128 |
| %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position |
| define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add.immoffset = add i64 %sbase.as.int, 128 |
| %add = add i64 %zext.offset, %add.immoffset |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Uniformity edge cases |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Both 64-bit base and 32-bit offset are scalar |
| define <4 x float> @global_load_i8_zext_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both 64-bit base and 32-bit offset are scalar, with immediate offset. |
| define <4 x float> @global_load_i8_zext_uniform_offset_immoffset(ptr addrspace(1) %sbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both components uniform, zext forced to LHS of addressing expression |
| define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset |
| define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %add.immoffset = add i64 %add, 128 |
| %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; divergent 64-bit base, 32-bit scalar offset. |
| define <4 x float> @global_load_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; divergent 64-bit base, 32-bit scalar offset, with imm offset |
| define <4 x float> @global_load_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Natural addressing shifts with restricted range |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Cannot push the shift into 32-bits, and cannot match. |
| define <4 x float> @global_load_f32_natural_addressing(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_natural_addressing: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_natural_addressing: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_natural_addressing: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_natural_addressing: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_natural_addressing: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_natural_addressing: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Cannot push the shift into 32-bits, with an immediate offset. |
| define <4 x float> @global_load_f32_natural_addressing_immoffset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing_immoffset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is sufficiently restricted to push the shift into 32-bits. |
| define <4 x float> @global_load_f32_zext_vgpr_range(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset |
| define <4 x float> @global_load_f32_zext_vgpr_range_imm_offset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is 1 beyond the limit where we can move the shift into 32-bits. |
| define <4 x float> @global_load_f32_zext_vgpr_range_too_large(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; or-with-constant as add |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Check add-as-or with split 64-bit or. |
| define <4 x float> @global_load_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) %sbase, i32 %idx) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 16, v1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1100-SDAG-NEXT: v_or_b32_e32 v1, 16, v1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 16, v1 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1100-ISEL-NEXT: v_or_b32_e32 v1, 16, v1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.idx = zext i32 %idx to i64 |
| %or = or i64 %zext.idx, 16 |
| %addr = inttoptr i64 %or to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; or-as-add with offset exceeding gfx9 imm range |
| define <4 x float> @global_load_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) %sbase, i32 %idx) { |
| ; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1100-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, 0x1040, v1 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1100-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-ISEL-NEXT: v_or_b32_e32 v2, 0x1040, v1 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.idx = zext i32 %idx to i64 |
| %or = or i64 %zext.idx, 4160 |
| %addr = inttoptr i64 %or to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Full 64-bit scalar add. |
| ;;------------------------------------------------------------------------------ |
| define <4 x float> @global_addr_64bit_lsr_iv(ptr addrspace(1) %arg) { |
| ; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX906-SDAG: ; %bb.0: ; %bb |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-SDAG-NEXT: .LBB60_1: ; %bb3 |
| ; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB60_1 |
| ; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX942-SDAG: ; %bb.0: ; %bb |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX942-SDAG-NEXT: .LBB60_1: ; %bb3 |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1 |
| ; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff |
| ; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB60_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1012-SDAG: ; %bb.0: ; %bb |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-SDAG-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB60_1 |
| ; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1100-SDAG: ; %bb.0: ; %bb |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX1100-SDAG-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_i32 s0, s0, 1 |
| ; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff |
| ; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB60_1 |
| ; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1250-SDAG: ; %bb.0: ; %bb |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX1250-SDAG-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 |
| ; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff |
| ; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB60_1 |
| ; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX906-ISEL: ; %bb.0: ; %bb |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; GFX906-ISEL-NEXT: .LBB60_1: ; %bb3 |
| ; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2 |
| ; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 |
| ; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB60_1 |
| ; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX942-ISEL: ; %bb.0: ; %bb |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX942-ISEL-NEXT: .LBB60_1: ; %bb3 |
| ; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2 |
| ; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 |
| ; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB60_1 |
| ; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1012-ISEL: ; %bb.0: ; %bb |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; GFX1012-ISEL-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB60_1 |
| ; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1100-ISEL: ; %bb.0: ; %bb |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX1100-ISEL-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB60_1 |
| ; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv: |
| ; GFX1250-ISEL: ; %bb.0: ; %bb |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX1250-ISEL-NEXT: .LBB60_1: ; %bb3 |
| ; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB60_1 |
| ; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| bb: |
| br label %bb3 |
| |
| bb2: ; preds = %bb3 |
| ret <4 x float> %i6 |
| |
| bb3: ; preds = %bb3, %bb |
| %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ] |
| %i4 = zext i32 %i to i64 |
| %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !0) |
| %i6 = bitcast <4 x i32> %load to <4 x float> |
| %i8 = add nuw nsw i32 %i, 1 |
| %i9 = icmp eq i32 %i8, 256 |
| br i1 %i9, label %bb2, label %bb3 |
| } |
| |
| ;; Make sure we only have a single zero vaddr initialization. |
| |
| ;; 64-bit LSR induction variable with multiple loads |
| define <4 x float> @global_addr_64bit_lsr_iv_multiload(ptr addrspace(1) %arg, ptr addrspace(1) %arg.1, i32 %x) { |
| ; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX906-SDAG: ; %bb.0: ; %bb |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-SDAG-NEXT: .LBB61_1: ; %bb5 |
| ; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB61_1 |
| ; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX942-SDAG: ; %bb.0: ; %bb |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX942-SDAG-NEXT: .LBB61_1: ; %bb5 |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1 |
| ; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff |
| ; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB61_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1012-SDAG: ; %bb.0: ; %bb |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-SDAG-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB61_1 |
| ; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1100-SDAG: ; %bb.0: ; %bb |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX1100-SDAG-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_i32 s0, s0, 1 |
| ; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff |
| ; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB61_1 |
| ; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1250-SDAG: ; %bb.0: ; %bb |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1 |
| ; GFX1250-SDAG-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 |
| ; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff |
| ; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB61_1 |
| ; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX906-ISEL: ; %bb.0: ; %bb |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; GFX906-ISEL-NEXT: .LBB61_1: ; %bb5 |
| ; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2 |
| ; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 |
| ; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB61_1 |
| ; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX942-ISEL: ; %bb.0: ; %bb |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX942-ISEL-NEXT: .LBB61_1: ; %bb5 |
| ; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2 |
| ; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 |
| ; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB61_1 |
| ; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1012-ISEL: ; %bb.0: ; %bb |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; GFX1012-ISEL-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB61_1 |
| ; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1100-ISEL: ; %bb.0: ; %bb |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX1100-ISEL-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB61_1 |
| ; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload: |
| ; GFX1250-ISEL: ; %bb.0: ; %bb |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX1250-ISEL-NEXT: .LBB61_1: ; %bb5 |
| ; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 |
| ; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2 |
| ; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB61_1 |
| ; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| bb: |
| br label %bb5 |
| |
| bb2: |
| %y = icmp eq i32 %x, 0 |
| br i1 %y, label %bb3, label %bb4 |
| |
| bb3: |
| ret <4 x float> %i6 |
| |
| bb4: |
| ret <4 x float> %i6.1 |
| |
| bb5: |
| %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ] |
| %i4 = zext i32 %i to i64 |
| %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !1) |
| %i6 = bitcast <4 x i32> %load to <4 x float> |
| %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4 |
| %load.1 = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !2) |
| %i6.1 = bitcast <4 x i32> %load to <4 x float> |
| %i8 = add nuw nsw i32 %i, 1 |
| %i9 = icmp eq i32 %i8, 256 |
| br i1 %i9, label %bb2, label %bb5 |
| } |
| |
| ;;============================================================================== |
| ;; Various saddr addressing modes (derived from global-saddr-load.ll) |
| ;;============================================================================== |
| |
| ;;------------------------------------------------------------------------------ |
| ;; No vgpr offset, constants |
| ;;------------------------------------------------------------------------------ |
| |
| ;; SGPR base only |
| define <4 x float> @global_load_saddr_i8_offset_0(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %sbase, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx9 immediate offset |
| define <4 x float> @global_load_saddr_i8_offset_4095(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx9 immediate offset + 1 |
| define <4 x float> @global_load_saddr_i8_offset_4096(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx9 immediate offset + 2 |
| define <4 x float> @global_load_saddr_i8_offset_4097(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx9 immediate offset |
| define <4 x float> @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff000 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx9 immediate offset -1 |
| define <4 x float> @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffefff |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx9 immediate offset -2 |
| define <4 x float> @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffeffe |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4098: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx10 immediate offset |
| define <4 x float> @global_load_saddr_i8_offset_2048(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx10 immediate offset + 1 |
| define <4 x float> @global_load_saddr_i8_offset_2049(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum gfx10 immediate offset + 2 |
| define <4 x float> @global_load_saddr_i8_offset_2050(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2050: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx10 immediate offset |
| define <4 x float> @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx10 immediate offset - 1 |
| define <4 x float> @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7ff |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; SGPR base with maximum negative gfx10 immediate offset - 1 |
| define <4 x float> @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7fe |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2050: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; maximum gfx12 saddr positive offset |
| define <4 x float> @global_load_saddr_i8_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; maximum gfx12 saddr negative offset |
| define <4 x float> @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000 |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xff800000 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xff800000, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; 32-bit unsigned max offset |
| define <4 x float> @global_load_saddr_i8_offset_0xFFFFFFFF(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff800 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff800 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset |
| define <4 x float> @global_load_saddr_i8_offset_0x100000000(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_i32 s17, s17, 1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_i32 s1, s1, 1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, 1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: s_add_i32 s1, s1, 1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, 1 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + 1 |
| define <4 x float> @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 1 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + max gfx9 imm |
| define <4 x float> @global_load_saddr_i8_offset_0x100000FFF(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xfff |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xfff |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xfff |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xfff |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfff |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xfff |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0xfff |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; exceeds 32-bit offset + max gfx9 imm + 1 |
| define <4 x float> @global_load_saddr_i8_offset_0x100001000(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0x1000 |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x1000 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x1000, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0x1000 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0x1000 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0x1000 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0x1000 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0x1000 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative 32-bit unsigned max offset |
| define <4 x float> @global_load_saddr_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; GFX942-SDAG-NEXT: s_nop 0 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800000, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 1 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative exceeds 32-bit offset |
| define <4 x float> @global_load_saddr_i8_offset_neg0x100000000(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_add_i32 s17, s17, -1 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_add_i32 s1, s1, -1 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, -1 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: s_add_i32 s1, s1, -1 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, -1 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; negative exceeds 32-bit offset + 1 |
| define <4 x float> @global_load_saddr_i8_offset_neg0x100000001(ptr addrspace(1) inreg %sbase) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_add_u32 s4, s16, -1 |
| ; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -2 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_add_u32 s0, s0, -1 |
| ; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -2 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, -1 |
| ; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -2 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, -1 |
| ; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Basic addressing patterns |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Basic pattern, no immediate offset. |
| define <4 x float> @global_load_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx9 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx9 + 1 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4096(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx9 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx9 - 1 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx10 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx10 + 1 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2048(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx10 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum negative offset on gfx10 - 1 |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Maximum positive offset on gfx12. |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64 |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Minimum offset on gfx12. |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64 |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-SDAG-NEXT: s_nop 1 |
| ; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| |
| ;; Maximum positive offset on gfx9, and immediate needs to be moved lower. |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4 |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095 |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; pointer addressing done in integers |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %sbase.as.int, %zext.offset |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression, with immediate offset |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %add.immoffset = add i64 %add, 128 |
| %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position |
| define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) inreg %sbase, i32 %voffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %voffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add.immoffset = add i64 %sbase.as.int, 128 |
| %add = add i64 %zext.offset, %add.immoffset |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Uniformity edge cases |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Both 64-bit base and 32-bit offset are scalar |
| define <4 x float> @global_load_saddr_i8_zext_uniform_offset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both 64-bit base and 32-bit offset are scalar, with immediate offset. |
| define <4 x float> @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both components uniform, zext forced to LHS of addressing expression |
| define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %dirty.gep = inttoptr i64 %add to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset |
| define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2 |
| ; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64 |
| %add = add i64 %zext.offset, %sbase.as.int |
| %add.immoffset = add i64 %add, 128 |
| %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; divergent 64-bit base, 32-bit scalar offset. |
| define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s17, 0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; divergent 64-bit base, 32-bit scalar offset, with imm offset |
| define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 inreg %soffset) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s17, 0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| ; GFX942-ISEL-NEXT: s_nop 1 |
| ; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.offset = zext i32 %soffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Natural addressing shifts with restricted range |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Cannot push the shift into 32-bits, and cannot match. |
| define <4 x float> @global_load_saddr_f32_natural_addressing(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Cannot push the shift into 32-bits, with an immediate offset. |
| define <4 x float> @global_load_saddr_f32_natural_addressing_immoffset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is sufficiently restricted to push the shift into 32-bits. |
| define <4 x float> @global_load_f32_saddr_zext_vgpr_range(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset |
| define <4 x float> @global_load_f32_saddr_zext_vgpr_range_imm_offset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; Range is 1 beyond the limit where we can move the shift into 32-bits. |
| define <4 x float> @global_load_f32_saddr_zext_vgpr_range_too_large(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) { |
| ; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0 |
| ; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3] |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0 |
| ; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{} |
| %zext.offset = zext i32 %voffset to i64 |
| %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !0) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; or-with-constant as add |
| ;;------------------------------------------------------------------------------ |
| |
| ;; Check add-as-or with split 64-bit or. |
| define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) inreg %sbase, i32 %idx) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-SDAG-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_or_b32_e32 v0, 16, v0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.idx = zext i32 %idx to i64 |
| %or = or i64 %zext.idx, 16 |
| %addr = inttoptr i64 %or to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;; or-as-add with offset exceeding gfx9 imm range |
| define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) inreg %sbase, i32 %idx) { |
| ; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX906-SDAG: ; %bb.0: |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX942-SDAG: ; %bb.0: |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1012-SDAG: ; %bb.0: |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1100-SDAG: ; %bb.0: |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX906-ISEL: ; %bb.0: |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX942-ISEL: ; %bb.0: |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1012-ISEL: ; %bb.0: |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1100-ISEL: ; %bb.0: |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160: |
| ; GFX1250-ISEL: ; %bb.0: |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| %zext.idx = zext i32 %idx to i64 |
| %or = or i64 %zext.idx, 4160 |
| %addr = inttoptr i64 %or to ptr addrspace(1) |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2) |
| %cast.load = bitcast <4 x i32> %load to <4 x float> |
| ret <4 x float> %cast.load |
| } |
| |
| ;;------------------------------------------------------------------------------ |
| ;; Full 64-bit scalar add. |
| ;;------------------------------------------------------------------------------ |
| define <4 x float> @global_saddr_64bit_lsr_iv(ptr addrspace(1) inreg %arg) { |
| ; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX906-SDAG: ; %bb.0: ; %bb |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-SDAG-NEXT: .LBB114_1: ; %bb3 |
| ; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB114_1 |
| ; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4 |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX942-SDAG: ; %bb.0: ; %bb |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX942-SDAG-NEXT: .LBB114_1: ; %bb3 |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1 |
| ; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff |
| ; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB114_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1012-SDAG: ; %bb.0: ; %bb |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-SDAG-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB114_1 |
| ; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4 |
| ; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1100-SDAG: ; %bb.0: ; %bb |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX1100-SDAG-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_i32 s2, s2, 1 |
| ; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff |
| ; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB114_1 |
| ; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_u32 s0, s0, s2 |
| ; GFX1100-SDAG-NEXT: s_addc_u32 s1, s1, s3 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1250-SDAG: ; %bb.0: ; %bb |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX1250-SDAG-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 |
| ; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff |
| ; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB114_1 |
| ; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX906-ISEL: ; %bb.0: ; %bb |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: .LBB114_1: ; %bb3 |
| ; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB114_1 |
| ; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX942-ISEL: ; %bb.0: ; %bb |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: .LBB114_1: ; %bb3 |
| ; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB114_1 |
| ; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1 |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1012-ISEL: ; %bb.0: ; %bb |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB114_1 |
| ; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1100-ISEL: ; %bb.0: ; %bb |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB114_1 |
| ; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv: |
| ; GFX1250-ISEL: ; %bb.0: ; %bb |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-ISEL-NEXT: .LBB114_1: ; %bb3 |
| ; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB114_1 |
| ; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| bb: |
| br label %bb3 |
| |
| bb2: ; preds = %bb3 |
| ret <4 x float> %i6 |
| |
| bb3: ; preds = %bb3, %bb |
| %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ] |
| %i4 = zext i32 %i to i64 |
| %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !3) |
| %i6 = bitcast <4 x i32> %load to <4 x float> |
| %i8 = add nuw nsw i32 %i, 1 |
| %i9 = icmp eq i32 %i8, 256 |
| br i1 %i9, label %bb2, label %bb3 |
| } |
| |
| ;; Make sure we only have a single zero vaddr initialization. |
| |
| ;; 64-bit LSR induction variable with multiple loads |
| define <4 x float> @global_saddr_64bit_lsr_iv_multiload(ptr addrspace(1) inreg %arg, ptr addrspace(1) inreg %arg.1, i32 %x) { |
| ; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX906-SDAG: ; %bb.0: ; %bb |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-SDAG-NEXT: .LBB115_1: ; %bb5 |
| ; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB115_1 |
| ; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4 |
| ; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5 |
| ; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX942-SDAG: ; %bb.0: ; %bb |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX942-SDAG-NEXT: .LBB115_1: ; %bb5 |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1 |
| ; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff |
| ; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB115_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1012-SDAG: ; %bb.0: ; %bb |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-SDAG-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1 |
| ; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff |
| ; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB115_1 |
| ; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0 |
| ; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4 |
| ; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5 |
| ; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] |
| ; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1100-SDAG: ; %bb.0: ; %bb |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX1100-SDAG-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_i32 s2, s2, 1 |
| ; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff |
| ; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB115_1 |
| ; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1100-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-SDAG-NEXT: s_add_u32 s0, s0, s2 |
| ; GFX1100-SDAG-NEXT: s_addc_u32 s1, s1, s3 |
| ; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1250-SDAG: ; %bb.0: ; %bb |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; GFX1250-SDAG-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 |
| ; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff |
| ; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB115_1 |
| ; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0 |
| ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] |
| ; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX906-ISEL: ; %bb.0: ; %bb |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX906-ISEL-NEXT: .LBB115_1: ; %bb5 |
| ; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB115_1 |
| ; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX942-ISEL: ; %bb.0: ; %bb |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX942-ISEL-NEXT: .LBB115_1: ; %bb5 |
| ; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB115_1 |
| ; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1012-ISEL: ; %bb.0: ; %bb |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX1012-ISEL-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB115_1 |
| ; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo |
| ; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off |
| ; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1100-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1100-ISEL: ; %bb.0: ; %bb |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1100-ISEL-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB115_1 |
| ; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] |
| ; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo |
| ; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload: |
| ; GFX1250-ISEL: ; %bb.0: ; %bb |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX1250-ISEL-NEXT: .LBB115_1: ; %bb5 |
| ; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 |
| ; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0 |
| ; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB115_1 |
| ; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2 |
| ; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1] |
| ; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off |
| ; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31] |
| bb: |
| br label %bb5 |
| |
| bb2: |
| %y = icmp eq i32 %x, 0 |
| br i1 %y, label %bb3, label %bb4 |
| |
| bb3: |
| ret <4 x float> %i6 |
| |
| bb4: |
| ret <4 x float> %i6.1 |
| |
| bb5: |
| %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ] |
| %i4 = zext i32 %i to i64 |
| %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4 |
| %load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !0) |
| %i6 = bitcast <4 x i32> %load to <4 x float> |
| %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4 |
| %load.1 = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !1) |
| %i6.1 = bitcast <4 x i32> %load to <4 x float> |
| %i8 = add nuw nsw i32 %i, 1 |
| %i9 = icmp eq i32 %i8, 256 |
| br i1 %i9, label %bb2, label %bb5 |
| } |
| |
| !0 = !{!"wavefront"} |
| !1 = !{!"workgroup"} |
| !2 = !{!"agent"} |
| !3 = !{!""} |
| |
| !4 = !{i32 0, i32 1073741824} ; (1 << 30) |
| !5 = !{i32 0, i32 1073741825} ; (1 << 30) + 1 |
| |