blob: 312d5b4e4c3bc1335b7622c3593fe792782e3d66 [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12 %s
define amdgpu_ps void @sextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) {
; GFX12-LABEL: sextload_P0_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_load_i8 v0, v[0:1]
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: flat_store_b32 v[2:3], v0
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(0) %ptra
%a32 = sext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(0) %out
ret void
}
define amdgpu_ps void @sextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) {
; GFX12-LABEL: sextload_P0_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_load_i16 v0, v[0:1]
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: flat_store_b32 v[2:3], v0
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(0) %ptra
%a32 = sext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(0) %out
ret void
}
define amdgpu_ps void @zextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) {
; GFX12-LABEL: zextload_P0_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: flat_store_b32 v[2:3], v0
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(0) %ptra
%a32 = zext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(0) %out
ret void
}
define amdgpu_ps void @zextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) {
; GFX12-LABEL: zextload_P0_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_load_u16 v0, v[0:1]
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: flat_store_b32 v[2:3], v0
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(0) %ptra
%a32 = zext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(0) %out
ret void
}
define amdgpu_ps void @sextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: sextload_P1_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_i8 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(1) %ptra
%a32 = sext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @sextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: sextload_P1_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_i16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(1) %ptra
%a32 = sext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @zextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: zextload_P1_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(1) %ptra
%a32 = zext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @zextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: zextload_P1_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_u16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(1) %ptra
%a32 = zext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @sextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) {
; GFX12-LABEL: sextload_P3_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_load_i8 v0, v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(3) %ptra
%a32 = sext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(3) %out
ret void
}
define amdgpu_ps void @sextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) {
; GFX12-LABEL: sextload_P3_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_load_i16 v0, v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(3) %ptra
%a32 = sext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(3) %out
ret void
}
define amdgpu_ps void @zextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) {
; GFX12-LABEL: zextload_P3_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_load_u8 v0, v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(3) %ptra
%a32 = zext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(3) %out
ret void
}
define amdgpu_ps void @zextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) {
; GFX12-LABEL: zextload_P3_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_load_u16 v0, v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(3) %ptra
%a32 = zext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(3) %out
ret void
}
define amdgpu_ps void @sextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: sextload_P4_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_i8 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(4) %ptra
%a32 = sext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @sextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: sextload_P4_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_i16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(4) %ptra
%a32 = sext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @zextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: zextload_P4_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(4) %ptra
%a32 = zext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @zextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) {
; GFX12-LABEL: zextload_P4_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_load_u16 v0, v[0:1], off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(4) %ptra
%a32 = zext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @sextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) {
; GFX12-LABEL: sextload_P5_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: scratch_load_i8 v0, v0, off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: scratch_store_b32 v1, v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(5) %ptra
%a32 = sext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(5) %out
ret void
}
define amdgpu_ps void @sextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) {
; GFX12-LABEL: sextload_P5_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: scratch_load_i16 v0, v0, off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: scratch_store_b32 v1, v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(5) %ptra
%a32 = sext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(5) %out
ret void
}
define amdgpu_ps void @zextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) {
; GFX12-LABEL: zextload_P5_i8:
; GFX12: ; %bb.0:
; GFX12-NEXT: scratch_load_u8 v0, v0, off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: scratch_store_b32 v1, v0, off
; GFX12-NEXT: s_endpgm
%a = load i8, ptr addrspace(5) %ptra
%a32 = zext i8 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(5) %out
ret void
}
define amdgpu_ps void @zextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) {
; GFX12-LABEL: zextload_P5_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: scratch_load_u16 v0, v0, off
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0
; GFX12-NEXT: scratch_store_b32 v1, v0, off
; GFX12-NEXT: s_endpgm
%a = load i16, ptr addrspace(5) %ptra
%a32 = zext i16 %a to i32
%res = add i32 %a32, %a32
store i32 %res, ptr addrspace(5) %out
ret void
}