| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
| ; FIXME-TRUE16. enable gisel |
| ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s |
| ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s |
| |
| define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; GFX10PLUS-LABEL: sample_1d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_1d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { |
| ; GFX10PLUS-LABEL: sample_2d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_2d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { |
| ; GFX10PLUS-LABEL: sample_3d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_3d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { |
| ; GFX10PLUS-LABEL: sample_cube_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_cube_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { |
| ; GFX10PLUS-LABEL: sample_1darray_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_1darray_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { |
| ; GFX10PLUS-LABEL: sample_2darray_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_2darray_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { |
| ; GFX10PLUS-LABEL: sample_b_1d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_b_1d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; GFX10PLUS-LABEL: sample_b_2d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_b_2d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { |
| ; GFX10PLUS-LABEL: sample_c_1d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_c_1d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; GFX10PLUS-LABEL: sample_c_2d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_c_2d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { |
| ; GFX10PLUS-LABEL: sample_d_1d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_d_1d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { |
| ; GFX10PLUS-LABEL: sample_d_2d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_d_2d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { |
| ; GFX10PLUS-LABEL: sample_l_1d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_l_1d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { |
| ; GFX10PLUS-LABEL: sample_l_2d_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_l_2d_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; GFX10PLUS-LABEL: sample_nortn_mix_1: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: sample_nortn_mix_1: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| main_body: |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret <4 x float> %v |
| } |
| |
| define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; GFX10PLUS-LABEL: sample_nortn_mix_2: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 |
| ; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: sample_nortn_mix_2: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-NEXT: v_mov_b32_e32 v4, v0 |
| ; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret <4 x float> %v |
| } |
| |
| define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3: |
| ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body |
| ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3: |
| ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body |
| ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-SDAG-LABEL: sample_nortn_mix_3: |
| ; GFX12-SDAG: ; %bb.0: ; %main_body |
| ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 |
| ; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: sample_nortn_mix_3: |
| ; GFX12-GISEL: ; %bb.0: ; %main_body |
| ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 |
| ; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %v.0 = extractelement <4 x float> %v, i32 0 |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret <4 x float> %u |
| } |
| |
| define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4: |
| ; GFX10PLUS-SDAG: ; %bb.0: ; %main_body |
| ; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4: |
| ; GFX10PLUS-GISEL: ; %bb.0: ; %main_body |
| ; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-SDAG-LABEL: sample_nortn_mix_4: |
| ; GFX12-SDAG: ; %bb.0: ; %main_body |
| ; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1 |
| ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2 |
| ; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: sample_nortn_mix_4: |
| ; GFX12-GISEL: ; %bb.0: ; %main_body |
| ; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo |
| ; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo |
| ; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1 |
| ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12 |
| ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2 |
| ; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %v.0 = extractelement <4 x float> %v, i32 0 |
| %v.1 = extractelement <4 x float> %v, i32 0 |
| %v.2 = extractelement <4 x float> %v, i32 0 |
| %v.3 = extractelement <4 x float> %v, i32 0 |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| %u.0 = extractelement <4 x float> %u, i32 0 |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret <4 x float> %u |
| } |
| |
| define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { |
| ; GFX10PLUS-LABEL: sample_d_1d_g16_nortn: |
| ; GFX10PLUS: ; %bb.0: ; %main_body |
| ; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX10PLUS-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: sample_d_1d_g16_nortn: |
| ; GFX12: ; %bb.0: ; %main_body |
| ; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
| ; GFX12-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind readonly } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX10: {{.*}} |
| ; GFX10-GISEL: {{.*}} |
| ; GFX10-SDAG: {{.*}} |
| ; GFX11: {{.*}} |
| ; GFX11-GISEL: {{.*}} |
| ; GFX11-GISEL-FAKE16: {{.*}} |
| ; GFX11-SDAG: {{.*}} |
| ; GFX11-SDAG-FAKE16: {{.*}} |
| ; GFX11-SDAG-TRUE16: {{.*}} |
| ; GFX12-GISEL-FAKE16: {{.*}} |
| ; GFX12-SDAG-FAKE16: {{.*}} |
| ; GFX12-SDAG-TRUE16: {{.*}} |