; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s

define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_1d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_1d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX10PLUS-LABEL: sample_2d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_2d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
; GFX10PLUS-LABEL: sample_3d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_3d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
; GFX10PLUS-LABEL: sample_cube_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_cube_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
; GFX10PLUS-LABEL: sample_1darray_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_1darray_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
; GFX10PLUS-LABEL: sample_2darray_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_2darray_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_b_1d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_b_1d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_b_2d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_b_2d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_c_1d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_c_1d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_c_2d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_c_2d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_d_1d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; GFX10PLUS-LABEL: sample_d_2d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_d_2d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
; GFX10PLUS-LABEL: sample_l_1d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_l_1d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
; GFX10PLUS-LABEL: sample_l_2d_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_l_2d_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_1:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_1:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_wait_samplecnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_2:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT:    v_mov_b32_e32 v4, v0
; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_2:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT:    v_mov_b32_e32 v4, v0
; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_wait_samplecnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
; GFX10PLUS-SDAG:       ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT:    image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
; GFX10PLUS-GISEL:       ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT:    image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT:    ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_3:
; GFX12-SDAG:       ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT:    image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT:    ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_3:
; GFX12-GISEL:       ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT:    image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %v.0 = extractelement <4 x float> %v, i32 0
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret <4 x float> %u
}

define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
; GFX10PLUS-SDAG:       ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT:    image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(2)
; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
; GFX10PLUS-GISEL:       ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT:    s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT:    image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(2)
; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT:    ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_4:
; GFX12-SDAG:       ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT:    image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x2
; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT:    ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_4:
; GFX12-GISEL:       ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT:    s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT:    image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x2
; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %v.0 = extractelement <4 x float> %v, i32 0
  %v.1 = extractelement <4 x float> %v, i32 0
  %v.2 = extractelement <4 x float> %v, i32 0
  %v.3 = extractelement <4 x float> %v, i32 0
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  %u.0 = extractelement <4 x float> %u, i32 0
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret <4 x float> %u
}

define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: sample_d_1d_g16_nortn:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT:    s_endpgm
main_body:
  call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  ret void
}

declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0

attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX10-GISEL: {{.*}}
; GFX10-SDAG: {{.*}}
; GFX11: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-GISEL-FAKE16: {{.*}}
; GFX11-SDAG: {{.*}}
; GFX11-SDAG-FAKE16: {{.*}}
; GFX11-SDAG-TRUE16: {{.*}}
; GFX12-GISEL-FAKE16: {{.*}}
; GFX12-SDAG-FAKE16: {{.*}}
; GFX12-SDAG-TRUE16: {{.*}}
