| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12 %s |
| |
| define amdgpu_ps void @load_P0_B16_D16(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: load_P0_B16_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_b16 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(0) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 0 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P0_B16_D16_Hi(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: load_P0_B16_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(0) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 1 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P0_i8_D16(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: sextload_P0_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_i8 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P0_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: sextload_P0_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_hi_i8 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P0_i8_D16(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: zextload_P0_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_u8 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P0_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(0) %ptra, ptr addrspace(0) %out) { |
| ; GFX12-LABEL: zextload_P0_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: flat_load_d16_hi_u8 v0, v[1:2] |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: flat_store_b32 v[3:4], v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(0) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(0) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P1_B16_D16(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: load_P1_B16_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_b16 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(1) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P1_B16_D16_Hi(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: load_P1_B16_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_b16 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(1) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P1_i8_D16(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P1_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_i8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P1_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P1_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_i8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P1_i8_D16(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P1_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_u8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P1_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(1) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P1_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_u8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(1) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P3_B16_D16(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: load_P3_B16_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u16_d16 v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(3) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 0 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P3_B16_D16_Hi(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: load_P3_B16_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u16_d16_hi v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(3) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 1 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P3_i8_D16(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: sextload_P3_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_i8_d16 v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P3_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: sextload_P3_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_i8_d16_hi v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P3_i8_D16(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: zextload_P3_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u8_d16 v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P3_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(3) %ptra, ptr addrspace(3) %out) { |
| ; GFX12-LABEL: zextload_P3_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: ds_load_u8_d16_hi v0, v1 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: ds_store_b32 v2, v0 |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(3) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(3) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P4_B16_D16(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: load_P4_B16_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_b16 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(4) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P4_B16_D16_Hi(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: load_P4_B16_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_b16 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(4) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P4_i8_D16(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P4_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_i8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P4_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: sextload_P4_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_i8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P4_i8_D16(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P4_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_u8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P4_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(4) %ptra, ptr addrspace(1) %out) { |
| ; GFX12-LABEL: zextload_P4_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_load_d16_hi_u8 v0, v[1:2], off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_store_b32 v[3:4], v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(4) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P5_B16_D16(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: load_P5_B16_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_b16 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(5) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 0 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @load_P5_B16_D16_Hi(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: load_P5_B16_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_hi_b16 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i16, ptr addrspace(5) %ptra |
| %res = insertelement <2 x i16> %vec, i16 %a, i32 1 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P5_i8_D16(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: sextload_P5_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_i8 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @sextload_P5_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: sextload_P5_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_hi_i8 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a16 = sext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P5_i8_D16(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: zextload_P5_i8_D16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_u8 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 0 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |
| |
| define amdgpu_ps void @zextload_P5_i8_D16_Hi(<2 x i16> %vec, ptr addrspace(5) %ptra, ptr addrspace(5) %out) { |
| ; GFX12-LABEL: zextload_P5_i8_D16_Hi: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: scratch_load_d16_hi_u8 v0, v1, off |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: scratch_store_b32 v2, v0, off |
| ; GFX12-NEXT: s_endpgm |
| %a = load i8, ptr addrspace(5) %ptra |
| %a16 = zext i8 %a to i16 |
| %res = insertelement <2 x i16> %vec, i16 %a16, i32 1 |
| store <2 x i16> %res, ptr addrspace(5) %out |
| ret void |
| } |