blob: 36fd9b2d6e0c4226992aef31a179e66826dc26eb [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefix=GFX906-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefix=GFX1012-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefix=GFX906-ISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942-ISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefix=GFX1012-ISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100-ISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-ISEL %s
;;==============================================================================
;; A few basic test cases
;;==============================================================================
;; vgpr pointer, wavefront scope
define <4 x i32> @av_global_load_b128_0_00(ptr addrspace(1) %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_0_00:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_0_00:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_0_00:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_0_00:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_0_00:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_0_00:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_0_00:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_0_00:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_0_00:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_0_00:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !0)
ret <4 x i32> %data
}
;; vgpr pointer, workgroup scope
define <4 x i32> @av_global_load_b128_0_01(ptr addrspace(1) %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_0_01:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_0_01:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_0_01:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_0_01:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_0_01:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_0_01:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_0_01:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_0_01:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_0_01:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_0_01:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1)
ret <4 x i32> %data
}
;; vgpr pointer, agent scope
define <4 x i32> @av_global_load_b128_0_10(ptr addrspace(1) %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_0_10:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_0_10:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_0_10:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_0_10:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_0_10:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_0_10:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_0_10:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_0_10:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_0_10:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_0_10:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2)
ret <4 x i32> %data
}
;; vgpr pointer, system scope
define <4 x i32> @av_global_load_b128_0_11(ptr addrspace(1) %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_0_11:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_0_11:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_0_11:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_0_11:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_0_11:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_0_11:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_0_11:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_0_11:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_0_11:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_0_11:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3)
ret <4 x i32> %data
}
;; sgpr pointer, wavefront scope
define <4 x i32> @av_global_load_b128_saddr_0_00(ptr addrspace(1) inreg %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_00:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_00:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_00:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_00:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_00:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_00:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_00:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_00:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_00:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_00:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !0)
ret <4 x i32> %data
}
;; sgpr pointer, workgroup scope
define <4 x i32> @av_global_load_b128_saddr_0_01(ptr addrspace(1) inreg %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_01:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_01:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_01:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_01:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_01:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_01:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_01:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_01:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_01:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_01:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1)
ret <4 x i32> %data
}
;; sgpr pointer, agent scope
define <4 x i32> @av_global_load_b128_saddr_0_02(ptr addrspace(1) inreg %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_02:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_02:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_02:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_02:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_02:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_02:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_02:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_02:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_02:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_02:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2)
ret <4 x i32> %data
}
;; sgpr pointer, system scope
define <4 x i32> @av_global_load_b128_saddr_0_03(ptr addrspace(1) inreg %addr) {
; GFX906-SDAG-LABEL: av_global_load_b128_saddr_0_03:
; GFX906-SDAG: ; %bb.0: ; %entry
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: av_global_load_b128_saddr_0_03:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: av_global_load_b128_saddr_0_03:
; GFX1012-SDAG: ; %bb.0: ; %entry
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: av_global_load_b128_saddr_0_03:
; GFX1100-SDAG: ; %bb.0: ; %entry
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: av_global_load_b128_saddr_0_03:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: av_global_load_b128_saddr_0_03:
; GFX906-ISEL: ; %bb.0: ; %entry
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: av_global_load_b128_saddr_0_03:
; GFX942-ISEL: ; %bb.0: ; %entry
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: av_global_load_b128_saddr_0_03:
; GFX1012-ISEL: ; %bb.0: ; %entry
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: av_global_load_b128_saddr_0_03:
; GFX1100-ISEL: ; %bb.0: ; %entry
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: av_global_load_b128_saddr_0_03:
; GFX1250-ISEL: ; %bb.0: ; %entry
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%data = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3)
ret <4 x i32> %data
}
;;==============================================================================
;; Signed offset addressing modes (derived from global-saddr-load.ll)
;;==============================================================================
;;------------------------------------------------------------------------------
;; No vgpr offset, constants
;;------------------------------------------------------------------------------
;; base only
define <4 x float> @global_load_i8_offset_0(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %sbase, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx9 immediate offset
define <4 x float> @global_load_i8_offset_4095(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx9 immediate offset + 1
define <4 x float> @global_load_i8_offset_4096(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx9 immediate offset + 2
define <4 x float> @global_load_i8_offset_4097(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx9 immediate offset
define <4 x float> @global_load_i8_offset_neg4096(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx9 immediate offset -1
define <4 x float> @global_load_i8_offset_neg4097(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx9 immediate offset -2
define <4 x float> @global_load_i8_offset_neg4098(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg4098:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg4098:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4098:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg4098:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4098:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg4098:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg4098:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4098:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg4098:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4098:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx10 immediate offset
define <4 x float> @global_load_i8_offset_2048(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx10 immediate offset + 1
define <4 x float> @global_load_i8_offset_2049(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x801, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum gfx10 immediate offset + 2
define <4 x float> @global_load_i8_offset_2050(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_2050:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_2050:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_2050:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_2050:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_2050:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_2050:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_2050:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_2050:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x802, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_2050:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_2050:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx10 immediate offset
define <4 x float> @global_load_i8_offset_neg2048(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx10 immediate offset - 1
define <4 x float> @global_load_i8_offset_neg2049(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; base with maximum negative gfx10 immediate offset - 1
define <4 x float> @global_load_i8_offset_neg2050(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg2050:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg2050:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2050:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg2050:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2050:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg2050:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg2050:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2050:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg2050:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2050:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; maximum gfx12 saddr positive offset
define <4 x float> @global_load_i8_offset_0x7FFFFF(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; maximum gfx12 saddr negative offset
define <4 x float> @global_load_i8_offset_0xFFFFFF(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; 32-bit unsigned max offset
define <4 x float> @global_load_i8_offset_0xFFFFFFFF(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset
define <4 x float> @global_load_i8_offset_0x100000000(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + 1
define <4 x float> @global_load_i8_offset_0x100000001(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000001:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000001:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000001:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000001:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000001:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000001:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000001:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000001:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000001:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000001:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + max gfx9 imm
define <4 x float> @global_load_i8_offset_0x100000FFF(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + max gfx9 imm + 1
define <4 x float> @global_load_i8_offset_0x100001000(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_0x100001000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_0x100001000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100001000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_0x100001000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100001000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_0x100001000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_0x100001000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100001000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_0x100001000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100001000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative 32-bit unsigned max offset
define <4 x float> @global_load_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative exceeds 32-bit offset
define <4 x float> @global_load_i8_offset_neg0x100000000(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative exceeds 32-bit offset + 1
define <4 x float> @global_load_i8_offset_neg0x100000001(ptr addrspace(1) %sbase) {
; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Basic addressing patterns
;;------------------------------------------------------------------------------
;; Basic pattern, no immediate offset.
define <4 x float> @global_load_i8_zext_vgpr(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9
define <4 x float> @global_load_i8_zext_vgpr_offset_4095(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9 + 1
define <4 x float> @global_load_i8_zext_vgpr_offset_4096(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx9
define <4 x float> @global_load_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx9 - 1
define <4 x float> @global_load_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx10
define <4 x float> @global_load_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx10 + 1
define <4 x float> @global_load_i8_zext_vgpr_offset_2048(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx10
define <4 x float> @global_load_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx10 - 1
define <4 x float> @global_load_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx12.
define <4 x float> @global_load_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Minimum offset on gfx12.
define <4 x float> @global_load_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
define <4 x float> @global_load_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; pointer addressing done in integers
define <4 x float> @global_load_i8_zext_vgpr_ptrtoint(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %sbase.as.int, %zext.offset
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression
define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression, with immediate offset
define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%add.immoffset = add i64 %add, 128
%dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add.immoffset = add i64 %sbase.as.int, 128
%add = add i64 %zext.offset, %add.immoffset
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Uniformity edge cases
;;------------------------------------------------------------------------------
;; Both 64-bit base and 32-bit offset are scalar
define <4 x float> @global_load_i8_zext_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_uniform_offset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_uniform_offset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
define <4 x float> @global_load_i8_zext_uniform_offset_immoffset(ptr addrspace(1) %sbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both components uniform, zext forced to LHS of addressing expression
define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%add.immoffset = add i64 %add, 128
%dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; divergent 64-bit base, 32-bit scalar offset.
define <4 x float> @global_load_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; divergent 64-bit base, 32-bit scalar offset, with imm offset
define <4 x float> @global_load_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 %soffset) {
; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Natural addressing shifts with restricted range
;;------------------------------------------------------------------------------
;; Cannot push the shift into 32-bits, and cannot match.
define <4 x float> @global_load_f32_natural_addressing(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_natural_addressing:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_natural_addressing:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_natural_addressing:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_natural_addressing:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_natural_addressing:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_natural_addressing:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Cannot push the shift into 32-bits, with an immediate offset.
define <4 x float> @global_load_f32_natural_addressing_immoffset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is sufficiently restricted to push the shift into 32-bits.
define <4 x float> @global_load_f32_zext_vgpr_range(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
define <4 x float> @global_load_f32_zext_vgpr_range_imm_offset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is 1 beyond the limit where we can move the shift into 32-bits.
define <4 x float> @global_load_f32_zext_vgpr_range_too_large(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; or-with-constant as add
;;------------------------------------------------------------------------------
;; Check add-as-or with split 64-bit or.
define <4 x float> @global_load_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) %sbase, i32 %idx) {
; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.idx = zext i32 %idx to i64
%or = or i64 %zext.idx, 16
%addr = inttoptr i64 %or to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; or-as-add with offset exceeding gfx9 imm range
define <4 x float> @global_load_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) %sbase, i32 %idx) {
; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, 0x1040, v1
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-ISEL-NEXT: v_or_b32_e32 v2, 0x1040, v1
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.idx = zext i32 %idx to i64
%or = or i64 %zext.idx, 4160
%addr = inttoptr i64 %or to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Full 64-bit scalar add.
;;------------------------------------------------------------------------------
define <4 x float> @global_addr_64bit_lsr_iv(ptr addrspace(1) %arg) {
; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv:
; GFX906-SDAG: ; %bb.0: ; %bb
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
; GFX906-SDAG-NEXT: .LBB60_1: ; %bb3
; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv:
; GFX942-SDAG: ; %bb.0: ; %bb
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, -1
; GFX942-SDAG-NEXT: .LBB60_1: ; %bb3
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1
; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv:
; GFX1012-SDAG: ; %bb.0: ; %bb
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
; GFX1012-SDAG-NEXT: .LBB60_1: ; %bb3
; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_addr_64bit_lsr_iv:
; GFX1100-SDAG: ; %bb.0: ; %bb
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: s_mov_b32 s0, -1
; GFX1100-SDAG-NEXT: .LBB60_1: ; %bb3
; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_i32 s0, s0, 1
; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1100-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1
; GFX1250-SDAG-NEXT: .LBB60_1: ; %bb3
; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv:
; GFX906-ISEL: ; %bb.0: ; %bb
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX906-ISEL-NEXT: .LBB60_1: ; %bb3
; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB60_1
; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv:
; GFX942-ISEL: ; %bb.0: ; %bb
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s0, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX942-ISEL-NEXT: .LBB60_1: ; %bb3
; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB60_1
; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv:
; GFX1012-ISEL: ; %bb.0: ; %bb
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1012-ISEL-NEXT: .LBB60_1: ; %bb3
; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB60_1
; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_addr_64bit_lsr_iv:
; GFX1100-ISEL: ; %bb.0: ; %bb
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s0, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1100-ISEL-NEXT: .LBB60_1: ; %bb3
; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB60_1
; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv:
; GFX1250-ISEL: ; %bb.0: ; %bb
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-ISEL-NEXT: .LBB60_1: ; %bb3
; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB60_1
; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
br label %bb3
bb2: ; preds = %bb3
ret <4 x float> %i6
bb3: ; preds = %bb3, %bb
%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !0)
%i6 = bitcast <4 x i32> %load to <4 x float>
%i8 = add nuw nsw i32 %i, 1
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}
;; Make sure we only have a single zero vaddr initialization.
;; 64-bit LSR induction variable with multiple loads
define <4 x float> @global_addr_64bit_lsr_iv_multiload(ptr addrspace(1) %arg, ptr addrspace(1) %arg.1, i32 %x) {
; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX906-SDAG: ; %bb.0: ; %bb
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
; GFX906-SDAG-NEXT: .LBB61_1: ; %bb5
; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX942-SDAG: ; %bb.0: ; %bb
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, -1
; GFX942-SDAG-NEXT: .LBB61_1: ; %bb5
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1
; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1012-SDAG: ; %bb.0: ; %bb
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
; GFX1012-SDAG-NEXT: .LBB61_1: ; %bb5
; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1100-SDAG: ; %bb.0: ; %bb
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: s_mov_b32 s0, -1
; GFX1100-SDAG-NEXT: .LBB61_1: ; %bb5
; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_i32 s0, s0, 1
; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1100-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1
; GFX1250-SDAG-NEXT: .LBB61_1: ; %bb5
; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX906-ISEL: ; %bb.0: ; %bb
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX906-ISEL-NEXT: .LBB61_1: ; %bb5
; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB61_1
; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX942-ISEL: ; %bb.0: ; %bb
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s0, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX942-ISEL-NEXT: .LBB61_1: ; %bb5
; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB61_1
; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1012-ISEL: ; %bb.0: ; %bb
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1012-ISEL-NEXT: .LBB61_1: ; %bb5
; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB61_1
; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1100-ISEL: ; %bb.0: ; %bb
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s0, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1100-ISEL-NEXT: .LBB61_1: ; %bb5
; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB61_1
; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX1250-ISEL: ; %bb.0: ; %bb
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-ISEL-NEXT: .LBB61_1: ; %bb5
; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB61_1
; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
br label %bb5
bb2:
%y = icmp eq i32 %x, 0
br i1 %y, label %bb3, label %bb4
bb3:
ret <4 x float> %i6
bb4:
ret <4 x float> %i6.1
bb5:
%i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !1)
%i6 = bitcast <4 x i32> %load to <4 x float>
%i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
%load.1 = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !2)
%i6.1 = bitcast <4 x i32> %load to <4 x float>
%i8 = add nuw nsw i32 %i, 1
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb5
}
;;==============================================================================
;; Various saddr addressing modes (derived from global-saddr-load.ll)
;;==============================================================================
;;------------------------------------------------------------------------------
;; No vgpr offset, constants
;;------------------------------------------------------------------------------
;; SGPR base only
define <4 x float> @global_load_saddr_i8_offset_0(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %sbase, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx9 immediate offset
define <4 x float> @global_load_saddr_i8_offset_4095(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx9 immediate offset + 1
define <4 x float> @global_load_saddr_i8_offset_4096(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx9 immediate offset + 2
define <4 x float> @global_load_saddr_i8_offset_4097(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx9 immediate offset
define <4 x float> @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff000
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx9 immediate offset -1
define <4 x float> @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffefff
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx9 immediate offset -2
define <4 x float> @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffeffe
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx10 immediate offset
define <4 x float> @global_load_saddr_i8_offset_2048(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx10 immediate offset + 1
define <4 x float> @global_load_saddr_i8_offset_2049(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum gfx10 immediate offset + 2
define <4 x float> @global_load_saddr_i8_offset_2050(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2050:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2050:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2050:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_2050:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2050:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2050:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2050:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2050:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_2050:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2050:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx10 immediate offset
define <4 x float> @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx10 immediate offset - 1
define <4 x float> @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7ff
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; SGPR base with maximum negative gfx10 immediate offset - 1
define <4 x float> @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7fe
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; maximum gfx12 saddr positive offset
define <4 x float> @global_load_saddr_i8_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; maximum gfx12 saddr negative offset
define <4 x float> @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xff800000
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xff800000, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; 32-bit unsigned max offset
define <4 x float> @global_load_saddr_i8_offset_0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff800
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff800
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset
define <4 x float> @global_load_saddr_i8_offset_0x100000000(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_i32 s17, s17, 1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_i32 s1, s1, 1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, 1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_add_i32 s1, s1, 1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, 1
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + 1
define <4 x float> @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 1
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + max gfx9 imm
define <4 x float> @global_load_saddr_i8_offset_0x100000FFF(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xfff
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xfff
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0xfff
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; exceeds 32-bit offset + max gfx9 imm + 1
define <4 x float> @global_load_saddr_i8_offset_0x100001000(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0x1000
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x1000
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x1000, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, 1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0x1000
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative 32-bit unsigned max offset
define <4 x float> @global_load_saddr_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s16
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX942-SDAG-NEXT: s_nop 0
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800000, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 1
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative exceeds 32-bit offset
define <4 x float> @global_load_saddr_i8_offset_neg0x100000000(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_add_i32 s17, s17, -1
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_add_i32 s1, s1, -1
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, -1
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_add_i32 s1, s1, -1
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, -1
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, 0
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; negative exceeds 32-bit offset + 1
define <4 x float> @global_load_saddr_i8_offset_neg0x100000001(ptr addrspace(1) inreg %sbase) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_add_u32 s4, s16, -1
; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -2
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_add_u32 s0, s0, -1
; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -2
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, -1
; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -2
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_add_u32 s0, s0, -1
; GFX1100-ISEL-NEXT: s_addc_u32 s1, s1, -2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Basic addressing patterns
;;------------------------------------------------------------------------------
;; Basic pattern, no immediate offset.
define <4 x float> @global_load_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9 + 1
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx9
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx9 - 1
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx10
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx10 + 1
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx10
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum negative offset on gfx10 - 1
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx12.
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Minimum offset on gfx12.
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-SDAG-NEXT: s_nop 1
; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; pointer addressing done in integers
define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %sbase.as.int, %zext.offset
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression
define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression, with immediate offset
define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%add.immoffset = add i64 %add, 128
%dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) inreg %sbase, i32 %voffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %voffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add.immoffset = add i64 %sbase.as.int, 128
%add = add i64 %zext.offset, %add.immoffset
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Uniformity edge cases
;;------------------------------------------------------------------------------
;; Both 64-bit base and 32-bit offset are scalar
define <4 x float> @global_load_saddr_i8_zext_uniform_offset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
define <4 x float> @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both components uniform, zext forced to LHS of addressing expression
define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%dirty.gep = inttoptr i64 %add to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
%add.immoffset = add i64 %add, 128
%dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %dirty.gep, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; divergent 64-bit base, 32-bit scalar offset.
define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s17, 0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s1, 0
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s1, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep0, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; divergent 64-bit base, 32-bit scalar offset, with imm offset
define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 inreg %soffset) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s17, 0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s1, 0
; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-ISEL-NEXT: s_nop 1
; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s1, 0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Natural addressing shifts with restricted range
;;------------------------------------------------------------------------------
;; Cannot push the shift into 32-bits, and cannot match.
define <4 x float> @global_load_saddr_f32_natural_addressing(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Cannot push the shift into 32-bits, with an immediate offset.
define <4 x float> @global_load_saddr_f32_natural_addressing_immoffset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is sufficiently restricted to push the shift into 32-bits.
define <4 x float> @global_load_f32_saddr_zext_vgpr_range(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
define <4 x float> @global_load_f32_saddr_zext_vgpr_range_imm_offset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep1, metadata !3)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; Range is 1 beyond the limit where we can move the shift into 32-bits.
define <4 x float> @global_load_f32_saddr_zext_vgpr_range_too_large(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
%zext.offset = zext i32 %voffset to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %gep, metadata !0)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; or-with-constant as add
;;------------------------------------------------------------------------------
;; Check add-as-or with split 64-bit or.
define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) inreg %sbase, i32 %idx) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.idx = zext i32 %idx to i64
%or = or i64 %zext.idx, 16
%addr = inttoptr i64 %or to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !1)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;; or-as-add with offset exceeding gfx9 imm range
define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) inreg %sbase, i32 %idx) {
; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX906-SDAG: ; %bb.0:
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX942-SDAG: ; %bb.0:
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1012-SDAG: ; %bb.0:
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX906-ISEL: ; %bb.0:
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX942-ISEL: ; %bb.0:
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1012-ISEL: ; %bb.0:
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1100-ISEL: ; %bb.0:
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
; GFX1250-ISEL: ; %bb.0:
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
%zext.idx = zext i32 %idx to i64
%or = or i64 %zext.idx, 4160
%addr = inttoptr i64 %or to ptr addrspace(1)
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %addr, metadata !2)
%cast.load = bitcast <4 x i32> %load to <4 x float>
ret <4 x float> %cast.load
}
;;------------------------------------------------------------------------------
;; Full 64-bit scalar add.
;;------------------------------------------------------------------------------
define <4 x float> @global_saddr_64bit_lsr_iv(ptr addrspace(1) inreg %arg) {
; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv:
; GFX906-SDAG: ; %bb.0: ; %bb
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
; GFX906-SDAG-NEXT: .LBB114_1: ; %bb3
; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv:
; GFX942-SDAG: ; %bb.0: ; %bb
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s2, -1
; GFX942-SDAG-NEXT: .LBB114_1: ; %bb3
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1
; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX942-SDAG-NEXT: s_mov_b32 s3, 0
; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv:
; GFX1012-SDAG: ; %bb.0: ; %bb
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
; GFX1012-SDAG-NEXT: .LBB114_1: ; %bb3
; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4
; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_saddr_64bit_lsr_iv:
; GFX1100-SDAG: ; %bb.0: ; %bb
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: s_mov_b32 s2, -1
; GFX1100-SDAG-NEXT: .LBB114_1: ; %bb3
; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_i32 s2, s2, 1
; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1100-SDAG-NEXT: s_mov_b32 s3, 0
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_u32 s0, s0, s2
; GFX1100-SDAG-NEXT: s_addc_u32 s1, s1, s3
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1
; GFX1250-SDAG-NEXT: .LBB114_1: ; %bb3
; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv:
; GFX906-ISEL: ; %bb.0: ; %bb
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: .LBB114_1: ; %bb3
; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB114_1
; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv:
; GFX942-ISEL: ; %bb.0: ; %bb
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s2, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: .LBB114_1: ; %bb3
; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB114_1
; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv:
; GFX1012-ISEL: ; %bb.0: ; %bb
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: .LBB114_1: ; %bb3
; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB114_1
; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_saddr_64bit_lsr_iv:
; GFX1100-ISEL: ; %bb.0: ; %bb
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s2, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: .LBB114_1: ; %bb3
; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB114_1
; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv:
; GFX1250-ISEL: ; %bb.0: ; %bb
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-ISEL-NEXT: .LBB114_1: ; %bb3
; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB114_1
; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
br label %bb3
bb2: ; preds = %bb3
ret <4 x float> %i6
bb3: ; preds = %bb3, %bb
%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !3)
%i6 = bitcast <4 x i32> %load to <4 x float>
%i8 = add nuw nsw i32 %i, 1
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}
;; Make sure we only have a single zero vaddr initialization.
;; 64-bit LSR induction variable with multiple loads
define <4 x float> @global_saddr_64bit_lsr_iv_multiload(ptr addrspace(1) inreg %arg, ptr addrspace(1) inreg %arg.1, i32 %x) {
; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX906-SDAG: ; %bb.0: ; %bb
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
; GFX906-SDAG-NEXT: .LBB115_1: ; %bb5
; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4
; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5
; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX942-SDAG: ; %bb.0: ; %bb
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s2, -1
; GFX942-SDAG-NEXT: .LBB115_1: ; %bb5
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1
; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX942-SDAG-NEXT: s_mov_b32 s3, 0
; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3
; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1012-SDAG: ; %bb.0: ; %bb
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
; GFX1012-SDAG-NEXT: .LBB115_1: ; %bb5
; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4
; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5
; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1100-SDAG: ; %bb.0: ; %bb
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: s_mov_b32 s2, -1
; GFX1100-SDAG-NEXT: .LBB115_1: ; %bb5
; GFX1100-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_i32 s2, s2, 1
; GFX1100-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
; GFX1100-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
; GFX1100-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1100-SDAG-NEXT: s_mov_b32 s3, 0
; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-SDAG-NEXT: s_add_u32 s0, s0, s2
; GFX1100-SDAG-NEXT: s_addc_u32 s1, s1, s3
; GFX1100-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1
; GFX1250-SDAG-NEXT: .LBB115_1: ; %bb5
; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX906-ISEL: ; %bb.0: ; %bb
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX906-ISEL-NEXT: .LBB115_1: ; %bb5
; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB115_1
; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX942-ISEL: ; %bb.0: ; %bb
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-ISEL-NEXT: s_mov_b32 s2, -1
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX942-ISEL-NEXT: .LBB115_1: ; %bb5
; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB115_1
; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1012-ISEL: ; %bb.0: ; %bb
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1012-ISEL-NEXT: .LBB115_1: ; %bb5
; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB115_1
; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1100-ISEL: ; %bb.0: ; %bb
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-ISEL-NEXT: s_mov_b32 s2, -1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1100-ISEL-NEXT: .LBB115_1: ; %bb5
; GFX1100-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1100-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1100-ISEL-NEXT: s_cbranch_vccz .LBB115_1
; GFX1100-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1100-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX1100-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX1100-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
; GFX1100-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1100-ISEL-NEXT: s_waitcnt vmcnt(0)
; GFX1100-ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
; GFX1250-ISEL: ; %bb.0: ; %bb
; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-ISEL-NEXT: .LBB115_1: ; %bb5
; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB115_1
; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
br label %bb5
bb2:
%y = icmp eq i32 %x, 0
br i1 %y, label %bb3, label %bb4
bb3:
ret <4 x float> %i6
bb4:
ret <4 x float> %i6.1
bb5:
%i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
%load = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !0)
%i6 = bitcast <4 x i32> %load to <4 x float>
%i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
%load.1 = call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) %i5, metadata !1)
%i6.1 = bitcast <4 x i32> %load to <4 x float>
%i8 = add nuw nsw i32 %i, 1
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb5
}
!0 = !{!"wavefront"}
!1 = !{!"workgroup"}
!2 = !{!"agent"}
!3 = !{!""}
!4 = !{i32 0, i32 1073741824} ; (1 << 30)
!5 = !{i32 0, i32 1073741825} ; (1 << 30) + 1