test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -instcombine -S < %s | FileCheck %s

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.rcp
 ; --------------------------------------------------------------------

 declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
 declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone

 define float @test_constant_fold_rcp_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef(
 ; CHECK-NEXT:    ret float 0x7FF8000000000000
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
   ret float %val
 }

 define float @test_constant_fold_rcp_f32_1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_1(
 ; CHECK-NEXT:    ret float 1.000000e+00
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
   ret float %val
 }

 define double @test_constant_fold_rcp_f64_1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f64_1(
 ; CHECK-NEXT:    ret double 1.000000e+00
 ;
   %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
   ret double %val
 }

 define float @test_constant_fold_rcp_f32_half() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_half(
 ; CHECK-NEXT:    ret float 2.000000e+00
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
   ret float %val
 }

 define double @test_constant_fold_rcp_f64_half() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f64_half(
 ; CHECK-NEXT:    ret double 2.000000e+00
 ;
   %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
   ret double %val
 }

 define float @test_constant_fold_rcp_f32_43() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43(
 ; CHECK-NEXT:    ret float 0x3F97D05F40000000
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
   ret float %val
 }

 define double @test_constant_fold_rcp_f64_43() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rcp_f64_43(
 ; CHECK-NEXT:    ret double 0x3F97D05F417D05F4
 ;
   %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
   ret double %val
 }

 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
 ; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) [[ATTR11:#.*]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
   ret float %val
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.rsq
 ; --------------------------------------------------------------------

 declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone

 define float @test_constant_fold_rsq_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef(
 ; CHECK-NEXT:    ret float 0x7FF8000000000000
 ;
   %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
   ret float %val
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.frexp.mant
 ; --------------------------------------------------------------------

 declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
 declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone


 define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
 ; CHECK-NEXT:    ret float undef
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
 ; CHECK-NEXT:    ret double undef
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
 ; CHECK-NEXT:    ret double 0.000000e+00
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
 ; CHECK-NEXT:    ret float -0.000000e+00
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
 ; CHECK-NEXT:    ret double -0.000000e+00
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
 ; CHECK-NEXT:    ret double 5.000000e-01
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
 ; CHECK-NEXT:    ret float -5.000000e-01
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
 ; CHECK-NEXT:    ret double -5.000000e-01
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
 ; CHECK-NEXT:    ret float 0x7FF8000000000000
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
 ; CHECK-NEXT:    ret double 0x7FF8000000000000
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
 ; CHECK-NEXT:    ret float 0x7FF0000000000000
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
 ; CHECK-NEXT:    ret double 0x7FF0000000000000
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
 ; CHECK-NEXT:    ret float 0xFFF0000000000000
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
 ; CHECK-NEXT:    ret double 0xFFF0000000000000
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
 ; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
 ; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
   ret double %val
 }

 define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
   ret float %val
 }

 define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
 ; CHECK-NEXT:    ret double 5.000000e-01
 ;
   %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
   ret double %val
 }


 ; --------------------------------------------------------------------
 ; llvm.amdgcn.frexp.exp
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
 declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone

 define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
 ; CHECK-NEXT:    ret i32 undef
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
 ; CHECK-NEXT:    ret i32 undef
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
 ; CHECK-NEXT:    ret i32 11
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
 ; CHECK-NEXT:    ret i32 11
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
 ; CHECK-NEXT:    ret i32 11
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
 ; CHECK-NEXT:    ret i32 11
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
 ; CHECK-NEXT:    ret i32 -9
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
 ; CHECK-NEXT:    ret i32 -9
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
 ; CHECK-NEXT:    ret i32 0
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
 ; CHECK-NEXT:    ret i32 128
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
 ; CHECK-NEXT:    ret i32 1024
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
 ; CHECK-NEXT:    ret i32 -148
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
   ret i32 %val
 }

 define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
 ; CHECK-NEXT:    ret i32 -1073
 ;
   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
   ret i32 %val
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.class
 ; --------------------------------------------------------------------

 declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
 declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone

 define i1 @test_class_undef_mask_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_undef_mask_f32(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
   ret i1 %val
 }

 define i1 @test_class_over_max_mask_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_over_max_mask_f32(
 ; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 1)
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
   ret i1 %val
 }

 define i1 @test_class_no_mask_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_no_mask_f32(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
   ret i1 %val
 }

 define i1 @test_class_full_mask_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_full_mask_f32(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
   ret i1 %val
 }

 define i1 @test_class_undef_no_mask_f32() nounwind {
 ; CHECK-LABEL: @test_class_undef_no_mask_f32(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
   ret i1 %val
 }

 define i1 @test_class_undef_full_mask_f32() nounwind {
 ; CHECK-LABEL: @test_class_undef_full_mask_f32(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
   ret i1 %val
 }

 define i1 @test_class_undef_val_f32() nounwind {
 ; CHECK-LABEL: @test_class_undef_val_f32(
 ; CHECK-NEXT:    ret i1 undef
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
   ret i1 %val
 }

 define i1 @test_class_undef_undef_f32() nounwind {
 ; CHECK-LABEL: @test_class_undef_undef_f32(
 ; CHECK-NEXT:    ret i1 undef
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
   ret i1 %val
 }

 define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
 ; CHECK-LABEL: @test_class_var_mask_f32(
 ; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 [[MASK:%.*]])
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
   ret i1 %val
 }

 define i1 @test_class_isnan_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_isnan_f32(
 ; CHECK-NEXT:    [[VAL:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
   ret i1 %val
 }

 define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
 ; CHECK-LABEL: @test_class_is_p0_n0_f32(
 ; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    ret i1 [[VAL]]
 ;
   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96)
   ret i1 %val
 }

 define i1 @test_constant_class_snan_test_snan_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
   ret i1 %val
 }

 define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
   ret i1 %val
 }

 define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
   ret i1 %val
 }

 define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
   ret i1 %val
 }

 define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
   ret i1 %val
 }

 define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
   ret i1 %val
 }

 define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
   ret i1 %val
 }

 define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
   ret i1 %val
 }

 define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
   ret i1 %val
 }

 define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
   ret i1 %val
 }

 define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
   ret i1 %val
 }

 define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
   ret i1 %val
 }

 define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
   ret i1 %val
 }

 define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
   ret i1 %val
 }

 define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
   ret i1 %val
 }

 define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
   ret i1 %val
 }

 define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
   ret i1 %val
 }

 define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
   ret i1 %val
 }

 define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
   ret i1 %val
 }

 define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
 ; CHECK-NEXT:    ret i1 true
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
   ret i1 %val
 }

 define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
   ret i1 %val
 }

 define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
   ret i1 %val
 }

 define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
 ; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
 ; CHECK-NEXT:    ret i1 false
 ;
   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
   ret i1 %val
 }

 define i1 @test_class_is_snan_nnan_src(float %x) {
 ; CHECK-LABEL: @test_class_is_snan_nnan_src(
 ; CHECK-NEXT:    ret i1 false
 ;
   %nnan = fadd nnan float %x, 1.0
   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1)
   ret i1 %class
 }

 define i1 @test_class_is_qnan_nnan_src(float %x) {
 ; CHECK-LABEL: @test_class_is_qnan_nnan_src(
 ; CHECK-NEXT:    ret i1 false
 ;
   %nnan = fadd nnan float %x, 1.0
   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2)
   ret i1 %class
 }

 define i1 @test_class_is_nan_nnan_src(float %x) {
 ; CHECK-LABEL: @test_class_is_nan_nnan_src(
 ; CHECK-NEXT:    ret i1 false
 ;
   %nnan = fadd nnan float %x, 1.0
   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3)
   ret i1 %class
 }

 define i1 @test_class_is_nan_other_nnan_src(float %x) {
 ; CHECK-LABEL: @test_class_is_nan_other_nnan_src(
 ; CHECK-NEXT:    [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00
 ; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[NNAN]], i32 264)
 ; CHECK-NEXT:    ret i1 [[CLASS]]
 ;
   %nnan = fadd nnan float %x, 1.0
   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267)
   ret i1 %class
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cos
 ; --------------------------------------------------------------------
 declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone

 define float @cos_fneg_f32(float %x) {
 ; CHECK-LABEL: @cos_fneg_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[COS]]
 ;
   %x.fneg = fsub float -0.0, %x
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
   ret float %cos
 }

 define float @cos_unary_fneg_f32(float %x) {
 ; CHECK-LABEL: @cos_unary_fneg_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[COS]]
 ;
   %x.fneg = fneg float %x
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
   ret float %cos
 }

 define float @cos_fabs_f32(float %x) {
 ; CHECK-LABEL: @cos_fabs_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[COS]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
   ret float %cos
 }

 define float @cos_fabs_fneg_f32(float %x) {
 ; CHECK-LABEL: @cos_fabs_fneg_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[COS]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
   %x.fabs.fneg = fsub float -0.0, %x.fabs
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
   ret float %cos
 }

 define float @cos_fabs_unary_fneg_f32(float %x) {
 ; CHECK-LABEL: @cos_fabs_unary_fneg_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[COS]]
 ;
   %x.fabs = call float @llvm.fabs.f32(float %x)
   %x.fabs.fneg = fneg float %x.fabs
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
   ret float %cos
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pkrtz
 ; --------------------------------------------------------------------

 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone

 define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
 ; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
   ret <2 x half> %cvt
 }

 define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
 ; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
   ret <2 x half> %cvt
 }

 define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
 ; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float 0.000000e+00)
 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
   ret <2 x half> %cvt
 }

 define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
 ; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
   ret <2 x half> %cvt
 }

 define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
 ; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float undef)
 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
   ret <2 x half> %cvt
 }

 define <2 x half> @undef_cvt_pkrtz() {
 ; CHECK-LABEL: @undef_cvt_pkrtz(
 ; CHECK-NEXT:    ret <2 x half> undef
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
   ret <2 x half> %cvt
 }

 define <2 x half> @constant_splat0_cvt_pkrtz() {
 ; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
 ; CHECK-NEXT:    ret <2 x half> zeroinitializer
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
   ret <2 x half> %cvt
 }

 define <2 x half> @constant_cvt_pkrtz() {
 ; CHECK-LABEL: @constant_cvt_pkrtz(
 ; CHECK-NEXT:    ret <2 x half> <half 0xH4000, half 0xH4400>
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
   ret <2 x half> %cvt
 }

 ; Test constant values where rtz changes result
 define <2 x half> @constant_rtz_pkrtz() {
 ; CHECK-LABEL: @constant_rtz_pkrtz(
 ; CHECK-NEXT:    ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
   ret <2 x half> %cvt
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pknorm.i16
 ; --------------------------------------------------------------------

 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone

 define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) {
 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) {
 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float [[X:%.*]], float undef)
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_cvt_pknorm_i16() {
 ; CHECK-LABEL: @undef_cvt_pknorm_i16(
 ; CHECK-NEXT:    ret <2 x i16> undef
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef)
   ret <2 x i16> %cvt
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pknorm.u16
 ; --------------------------------------------------------------------

 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone

 define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) {
 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) {
 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float [[X:%.*]], float undef)
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_cvt_pknorm_u16() {
 ; CHECK-LABEL: @undef_cvt_pknorm_u16(
 ; CHECK-NEXT:    ret <2 x i16> undef
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef)
   ret <2 x i16> %cvt
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pk.i16
 ; --------------------------------------------------------------------

 declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone

 define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) {
 ; CHECK-LABEL: @undef_lhs_cvt_pk_i16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) {
 ; CHECK-LABEL: @undef_rhs_cvt_pk_i16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 [[X:%.*]], i32 undef)
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_cvt_pk_i16() {
 ; CHECK-LABEL: @undef_cvt_pk_i16(
 ; CHECK-NEXT:    ret <2 x i16> undef
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef)
   ret <2 x i16> %cvt
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pk.u16
 ; --------------------------------------------------------------------

 declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone

 define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) {
 ; CHECK-LABEL: @undef_lhs_cvt_pk_u16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) {
 ; CHECK-LABEL: @undef_rhs_cvt_pk_u16(
 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 [[X:%.*]], i32 undef)
 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
   ret <2 x i16> %cvt
 }

 define <2 x i16> @undef_cvt_pk_u16() {
 ; CHECK-LABEL: @undef_cvt_pk_u16(
 ; CHECK-NEXT:    ret <2 x i16> undef
 ;
   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef)
   ret <2 x i16> %cvt
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.ubfe
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
 declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone

 define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
 ; CHECK-LABEL: @ubfe_var_i32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 5, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 5)
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
   ret i32 %bfe
 }

 define i32 @ubfe_width_0(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_width_0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
   ret i32 %bfe
 }

 define i32 @ubfe_width_31(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_width_31(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 31)
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
   ret i32 %bfe
 }

 define i32 @ubfe_width_32(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_width_32(
 ; CHECK-NEXT:    ret i32 0
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
   ret i32 %bfe
 }

 define i32 @ubfe_width_33(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_width_33(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 1)
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_33(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_offset_33(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 1, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_0(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_offset_0(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_32(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_offset_32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_31(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_offset_31(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_0_width_0(i32 %src) {
 ; CHECK-LABEL: @ubfe_offset_0_width_0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_0_width_3(i32 %src) {
 ; CHECK-LABEL: @ubfe_offset_0_width_3(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SRC:%.*]], 7
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_3_width_1(i32 %src) {
 ; CHECK-LABEL: @ubfe_offset_3_width_1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
 ; CHECK-NEXT:    [[BFE:%.*]] = and i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
   ret i32 %bfe
 }

 define i32 @ubfe_offset_3_width_4(i32 %src) {
 ; CHECK-LABEL: @ubfe_offset_3_width_4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
 ; CHECK-NEXT:    [[BFE:%.*]] = and i32 [[TMP1]], 15
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
   ret i32 %bfe
 }

 define i32 @ubfe_0_0_0() {
 ; CHECK-LABEL: @ubfe_0_0_0(
 ; CHECK-NEXT:    ret i32 0
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
   ret i32 %bfe
 }

 define i32 @ubfe_neg1_5_7() {
 ; CHECK-LABEL: @ubfe_neg1_5_7(
 ; CHECK-NEXT:    ret i32 127
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
   ret i32 %bfe
 }

 define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
 ; CHECK-LABEL: @ubfe_undef_src_i32(
 ; CHECK-NEXT:    ret i32 undef
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_undef_offset_i32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 undef, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
   ret i32 %bfe
 }

 define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
 ; CHECK-LABEL: @ubfe_undef_width_i32(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 undef)
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
   ret i32 %bfe
 }

 define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
 ; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SRC:%.*]], 33
 ; CHECK-NEXT:    [[BFE:%.*]] = and i64 [[TMP1]], 15
 ; CHECK-NEXT:    ret i64 [[BFE]]
 ;
   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
   ret i64 %bfe
 }

 define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
 ; CHECK-LABEL: @ubfe_offset_0_i64(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i64 @llvm.amdgcn.ubfe.i64(i64 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i64 [[BFE]]
 ;
   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
   ret i64 %bfe
 }

 define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
 ; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
 ; CHECK-NEXT:    [[BFE:%.*]] = lshr i64 [[SRC:%.*]], 32
 ; CHECK-NEXT:    ret i64 [[BFE]]
 ;
   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
   ret i64 %bfe
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.sbfe
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
 declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone

 define i32 @sbfe_offset_31(i32 %src, i32 %width) {
 ; CHECK-LABEL: @sbfe_offset_31(
 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.sbfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
 ; CHECK-NEXT:    ret i32 [[BFE]]
 ;
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
   ret i32 %bfe
 }

 define i32 @sbfe_neg1_5_7() {
 ; CHECK-LABEL: @sbfe_neg1_5_7(
 ; CHECK-NEXT:    ret i32 -1
 ;
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
   ret i32 %bfe
 }

 define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
 ; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
 ; CHECK-NEXT:    [[BFE:%.*]] = ashr i64 [[SRC:%.*]], 32
 ; CHECK-NEXT:    ret i64 [[BFE]]
 ;
   %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
   ret i64 %bfe
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.exp
 ; --------------------------------------------------------------------

 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly


 define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
   ; enable src0..src3 constants
 ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float [[X:%.*]], float undef, float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float [[Y:%.*]], float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float [[Z:%.*]], float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float [[W:%.*]], i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)

   ; enable src0..src3 variables
   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)

   ; enable none
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)

   ; enable different source combinations
   call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)

   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.exp.compr
 ; --------------------------------------------------------------------

 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly


 define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
 ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> [[XY:%.*]], <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> [[ZW:%.*]], i1 true, i1 false)
 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[XY]], <2 x half> [[ZW]], i1 true, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)

   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)

   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.fmed3
 ; --------------------------------------------------------------------

 declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone

 define float @fmed3_f32(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fmed3_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
   ret float %med3
 }

 define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
 ; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
   ret float %med3
 }

 define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
 ; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
   ret float %med3
 }

 define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
 ; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
   ret float %med3
 }

 define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
   ret float %med3
 }

 define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
   ret float %med3
 }

 define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
   ret float %med3
 }

 define float @fmed3_undef_x_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_undef_x_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
   ret float %med3
 }

 define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
   ret float %med3
 }

 define float @fmed3_x_undef_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_x_undef_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
   ret float %med3
 }

 define float @fmed3_x_y_undef_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_x_y_undef_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
   ret float %med3
 }

 define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
   ret float %med3
 }

 define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
   ret float %med3
 }

 define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
   ret float %med3
 }

 define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
   ret float %med3
 }

 ; This can return any of the qnans.
 define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
 ; CHECK-NEXT:    ret float 0x7FF8030000000000
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
   ret float %med3
 }

 define float @fmed3_constant_src0_0_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src0_0_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
   ret float %med3
 }

 define float @fmed3_constant_src0_1_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src0_1_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
   ret float %med3
 }

 define float @fmed3_constant_src1_0_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src1_0_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
   ret float %med3
 }

 define float @fmed3_constant_src1_1_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src1_1_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
   ret float %med3
 }

 define float @fmed3_constant_src2_0_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src2_0_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
   ret float %med3
 }

 define float @fmed3_constant_src2_1_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmed3_constant_src2_1_f32(
 ; CHECK-NEXT:    ret float 5.000000e-01
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
   ret float %med3
 }

 define float @fmed3_x_qnan0_qnan1_f32(float %x) {
 ; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
 ; CHECK-NEXT:    ret float [[X:%.*]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
   ret float %med3
 }

 define float @fmed3_qnan0_x_qnan1_f32(float %x) {
 ; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
 ; CHECK-NEXT:    ret float [[X:%.*]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
   ret float %med3
 }

 define float @fmed3_qnan0_qnan1_x_f32(float %x) {
 ; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
 ; CHECK-NEXT:    ret float [[X:%.*]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
   ret float %med3
 }

 define float @fmed3_nan_0_1_f32() {
 ; CHECK-LABEL: @fmed3_nan_0_1_f32(
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0)
   ret float %med3
 }

 define float @fmed3_0_nan_1_f32() {
 ; CHECK-LABEL: @fmed3_0_nan_1_f32(
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0)
   ret float %med
 }

 define float @fmed3_0_1_nan_f32() {
 ; CHECK-LABEL: @fmed3_0_1_nan_f32(
 ; CHECK-NEXT:    ret float 1.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
   ret float %med
 }

 define float @fmed3_undef_0_1_f32() {
 ; CHECK-LABEL: @fmed3_undef_0_1_f32(
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0)
   ret float %med3
 }

 define float @fmed3_0_undef_1_f32() {
 ; CHECK-LABEL: @fmed3_0_undef_1_f32(
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0)
   ret float %med
 }

 define float @fmed3_0_1_undef_f32() {
 ; CHECK-LABEL: @fmed3_0_1_undef_f32(
 ; CHECK-NEXT:    ret float 1.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
   ret float %med
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.icmp
 ; --------------------------------------------------------------------

 declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent
 declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent
 declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent

 define i64 @invalid_icmp_code(i32 %a, i32 %b) {
 ; CHECK-LABEL: @invalid_icmp_code(
 ; CHECK-NEXT:    [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31)
 ; CHECK-NEXT:    [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42)
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
   %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31)
   %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42)
   %or = or i64 %under, %over
   ret i64 %or
 }

 define i64 @icmp_constant_inputs_false() {
 ; CHECK-LABEL: @icmp_constant_inputs_false(
 ; CHECK-NEXT:    ret i64 0
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)
   ret i64 %result
 }

 define i64 @icmp_constant_inputs_true() {
 ; CHECK-LABEL: @icmp_constant_inputs_true(
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12:#.*]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
   ret i64 %result
 }

 define i64 @icmp_constant_to_rhs_slt(i32 %x) {
 ; CHECK-LABEL: @icmp_constant_to_rhs_slt(
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38)
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40)
   ret i64 %result
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ne i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp sle i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq float %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp une float %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp olt double %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %sext.cmp = sext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq float %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp ule float %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp ogt float %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
 ; CHECK-NEXT:    [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %zext.cond = zext i1 %cond to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
 ; CHECK-NEXT:    [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %zext.cond = zext i1 %cond to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %sext.cond = sext i1 %cond to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %sext.cond = sext i1 %cond to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %sext.cond = sext i1 %cond to i64
   %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32)
   ret i64 %mask
 }

 ; TODO: Should be able to fold to false
 define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %sext.cmp = sext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %sext.cmp = sext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp sge i32 %a, %b
   %sext.cmp = sext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
   ret i64 %mask
 }

 define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp sle i32 %a, %b
   %not = xor i1 %cmp, true
   %zext.cmp = zext i1 %not to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i4 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i8 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i16 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i36 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i128 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq half %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq fp128 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i4 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i8 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i16 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i4 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i8 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16(
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i16 %a, %b
   %zext.cmp = zext i1 %cmp to i32
   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
   ret i64 %mask
 }

 ; 1-bit NE comparisons

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i32 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ne i32 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp sle i32 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq float %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp une float %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp olt double %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i4 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i8 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i16 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i36 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp eq i128 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq half %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = fcmp oeq fp128 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i4 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i8 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp slt i16 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i4 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i8 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
 ; CHECK-NEXT:    ret i64 [[MASK]]
 ;
   %cmp = icmp ult i16 %a, %b
   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
   ret i64 %mask
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.fcmp
 ; --------------------------------------------------------------------

 declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent

 define i64 @invalid_fcmp_code(float %a, float %b) {
 ; CHECK-LABEL: @invalid_fcmp_code(
 ; CHECK-NEXT:    [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1)
 ; CHECK-NEXT:    [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16)
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
   %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1)
   %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16)
   %or = or i64 %under, %over
   ret i64 %or
 }

 define i64 @fcmp_constant_inputs_false() {
 ; CHECK-LABEL: @fcmp_constant_inputs_false(
 ; CHECK-NEXT:    ret i64 0
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)
   ret i64 %result
 }

 define i64 @fcmp_constant_inputs_true() {
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
   ret i64 %result
 }

 define i64 @fcmp_constant_to_rhs_olt(float %x) {
 ; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2)
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4)
   ret i64 %result
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.ballot
 ; --------------------------------------------------------------------

 declare i64 @llvm.amdgcn.ballot.i64(i1) nounwind readnone convergent
 declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent

 define i64 @ballot_nocombine_64(i1 %i) {
 ; CHECK-LABEL: @ballot_nocombine_64(
 ; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
   ret i64 %b
 }

 define i64 @ballot_zero_64() {
 ; CHECK-LABEL: @ballot_zero_64(
 ; CHECK-NEXT:    ret i64 0
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 0)
   ret i64 %b
 }

 define i64 @ballot_one_64() {
 ; CHECK-LABEL: @ballot_one_64(
 ; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
   ret i64 %b
 }

 define i32 @ballot_nocombine_32(i1 %i) {
 ; CHECK-LABEL: @ballot_nocombine_32(
 ; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i)
   ret i32 %b
 }

 define i32 @ballot_zero_32() {
 ; CHECK-LABEL: @ballot_zero_32(
 ; CHECK-NEXT:    ret i32 0
 ;
   %b = call i32 @llvm.amdgcn.ballot.i32(i1 0)
   ret i32 %b
 }

 define i32 @ballot_one_32() {
 ; CHECK-LABEL: @ballot_one_32(
 ; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata !1) [[ATTR12]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
   ret i32 %b
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.wqm.vote
 ; --------------------------------------------------------------------

 declare i1 @llvm.amdgcn.wqm.vote(i1)

 define float @wqm_vote_true() {
 ; CHECK-LABEL: @wqm_vote_true(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    ret float 1.000000e+00
 ;
 main_body:
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 true)
   %r = select i1 %w, float 1.0, float 0.0
   ret float %r
 }

 define float @wqm_vote_false() {
 ; CHECK-LABEL: @wqm_vote_false(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
 main_body:
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 false)
   %r = select i1 %w, float 1.0, float 0.0
   ret float %r
 }

 define float @wqm_vote_undef() {
 ; CHECK-LABEL: @wqm_vote_undef(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
 main_body:
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef)
   %r = select i1 %w, float 1.0, float 0.0
   ret float %r
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.kill
 ; --------------------------------------------------------------------

 declare void @llvm.amdgcn.kill(i1)

 define void @kill_true() {
 ; CHECK-LABEL: @kill_true(
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.amdgcn.kill(i1 true)
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.readfirstlane
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.readfirstlane(i32)

 @gv = constant i32 0

 define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_constant(
 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0)
   %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123)
   %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32))
   %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
   store volatile i32 %var, i32* undef
   store volatile i32 %zero, i32* undef
   store volatile i32 %imm, i32* undef
   store volatile i32 %constexpr, i32* undef
   store volatile i32 %undef, i32* undef
   ret void
 }

 define i32 @readfirstlane_idempotent(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_idempotent(
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    ret i32 [[READ0]]
 ;
   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
   %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
   ret i32 %read2
 }

 define i32 @readfirstlane_readlane(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_readlane(
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    ret i32 [[READ0]]
 ;
   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
   ret i32 %read1
 }

 define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
 ; CHECK-NEXT:    ret i32 [[READ1]]
 ;
 bb0:
   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   br label %bb1

 bb1:
   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
   ret i32 %read1
 }

 define i32 @readfirstlane_readlane_different_block(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_readlane_different_block(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
 ; CHECK-NEXT:    ret i32 [[READ1]]
 ;
 bb0:
   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
   br label %bb1

 bb1:
   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
   ret i32 %read1
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.readlane
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.readlane(i32, i32)

 define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
 ; CHECK-LABEL: @readlane_constant(
 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 7)
 ; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
 ; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
   %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane)
   %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane)
   %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane)
   %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane)
   store volatile i32 %var, i32* undef
   store volatile i32 %zero, i32* undef
   store volatile i32 %imm, i32* undef
   store volatile i32 %constexpr, i32* undef
   store volatile i32 %undef, i32* undef
   ret void
 }

 define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
 ; CHECK-LABEL: @readlane_idempotent(
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
 ; CHECK-NEXT:    ret i32 [[READ0]]
 ;
   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
   ret i32 %read1
 }

 define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
 ; CHECK-LABEL: @readlane_idempotent_different_lanes(
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
 ; CHECK-NEXT:    ret i32 [[READ1]]
 ;
   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
   ret i32 %read1
 }

 define i32 @readlane_readfirstlane(i32 %arg) {
 ; CHECK-LABEL: @readlane_readfirstlane(
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    ret i32 [[READ0]]
 ;
   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
   ret i32 %read1
 }

 define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
 ; CHECK-LABEL: @readlane_idempotent_different_block(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
 ; CHECK-NEXT:    ret i32 [[READ1]]
 ;
 bb0:
   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
   br label %bb1

 bb1:
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
   ret i32 %read1
 }


 define i32 @readlane_readfirstlane_different_block(i32 %arg) {
 ; CHECK-LABEL: @readlane_readfirstlane_different_block(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
 ; CHECK-NEXT:    ret i32 [[READ1]]
 ;
 bb0:
   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   br label %bb1

 bb1:
   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
   ret i32 %read1
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.update.dpp.i32
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)

 define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
 ; CHECK-LABEL: @update_dpp_no_combine(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false)
 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
 ; CHECK-LABEL: @update_dpp_drop_old(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true)
 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
 ; CHECK-LABEL: @update_dpp_undef_old(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true)
 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
 }


 ; --------------------------------------------------------------------
 ; llvm.amdgcn.permlane16
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg)

 define amdgpu_kernel void @permlane16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @permlane16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.permlanex16
 ; --------------------------------------------------------------------

 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg)

 define amdgpu_kernel void @permlanex16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @permlanex16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
   store i32 %res, i32 addrspace(1)* %out
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.image.sample a16
 ; --------------------------------------------------------------------

 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

 define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %face32 = fpext half %face to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
 ; CHECK-LABEL: @image_sample_a16_b_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_b_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_b_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_b_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_d_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_d_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_d_3d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %drdh32 = fpext half %drdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %drdv32 = fpext half %drdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_d_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_d_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_cd_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_cd_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_lz_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_lz_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store float %res, float addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <2 x float> %res, <2 x float> addrspace(1)* %out
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.image.sample g16
 ; --------------------------------------------------------------------

 define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_d_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_d_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
 ; CHECK-LABEL: @image_sample_g16_d_3d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %drdh32 = fpext half %drdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %drdv32 = fpext half %drdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_d_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_d_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_cd_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_cd_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store float %res, float addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dtdh32 = fpext half %dtdh to float
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <2 x float> %res, <2 x float> addrspace(1)* %out
   ret void
 }

 ; --------------------------------------------------------------------
 ; llvm.amdgcn.image.sample a16 preserve fast-math flags
 ; --------------------------------------------------------------------

 define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %face32 = fpext half %face to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray_nnan(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   store <4 x float> %res, <4 x float> addrspace(1)* %out
   ret void
 }