| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12 %s |
| |
| define amdgpu_ps void @sextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: sextload_P0_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_i8 v0, v[0:1] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: flat_store_b32 v[2:3], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a32 = sext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: sextload_P0_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_i16 v0, v[0:1] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: flat_store_b32 v[2:3], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(0) %ptra |
| %a32 = sext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P0_i8(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: zextload_P0_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_u8 v0, v[0:1] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: flat_store_b32 v[2:3], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a32 = zext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P0_i16(ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: zextload_P0_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_u16 v0, v[0:1] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: flat_store_b32 v[2:3], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(0) %ptra |
| %a32 = zext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P1_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_i8 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a32 = sext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P1_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_i16 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(1) %ptra |
| %a32 = sext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P1_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P1_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a32 = zext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P1_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P1_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(1) %ptra |
| %a32 = zext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: sextload_P3_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_i8 v0, v0 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: ds_store_b32 v1, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a32 = sext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: sextload_P3_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_i16 v0, v0 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: ds_store_b32 v1, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(3) %ptra |
| %a32 = sext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P3_i8(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: zextload_P3_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u8 v0, v0 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: ds_store_b32 v1, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a32 = zext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P3_i16(ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: zextload_P3_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u16 v0, v0 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: ds_store_b32 v1, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(3) %ptra |
| %a32 = zext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P4_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_i8 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a32 = sext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P4_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_i16 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(4) %ptra |
| %a32 = sext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P4_i8(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P4_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a32 = zext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P4_i16(ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P4_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(4) %ptra |
| %a32 = zext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: sextload_P5_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_i8 v0, v0, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: scratch_store_b32 v1, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a32 = sext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: sextload_P5_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_i16 v0, v0, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: scratch_store_b32 v1, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(5) %ptra |
| %a32 = sext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P5_i8(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: zextload_P5_i8: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_u8 v0, v0, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: scratch_store_b32 v1, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a32 = zext i8 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P5_i16(ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: zextload_P5_i16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_u16 v0, v0, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v0 |
| ; GFX12-NEXT: scratch_store_b32 v1, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(5) %ptra |
| %a32 = zext i16 %a to i32 |
| %res = add i32 %a32, %a32 |
| store i32 %res, ptr addrspace(5) %out |
| ret void |
| } |