llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=hawaii %s -o - | FileCheck %s

 ; For all these tests we disable optimizations through function attributes
 ; because the code we are exercising here needs phis and we want to keep the
 ; IR small.

 ; This code used to crash in SDISel because f16 was promoted to f32 through
 ; a `f32 = vector_extract_elt <1 x f16>, i32 0`, which is illegal.
 ; The invalid SDNode and thus, the crash was only exposed by the constant
 ; folding.
 define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
 ; CHECK-LABEL: phi_vec1half_to_f32_with_const_folding:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  ; %bb.1: ; %bb
 ; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, s4
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
 ; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, s4
 ; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   br label %bb

 bb:
   %phi = phi <1 x half> [ zeroinitializer, %entry ]
   %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
   store <2 x half> %res, ptr addrspace(1) %dst
   ret void
 }

 ; Same as phi_vec1half_to_f32_with_const_folding but without the folding.
 ; This test exercises the same invalid SDNode, but it happened to work by
 ; accident before. Here we make sure the fix also work as expected in the
 ; non-constant folding case.
 define void @phi_vec1half_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
 ; CHECK-LABEL: phi_vec1half_to_f32:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_cvt_f32_f16_e64 v0, v0
 ; CHECK-NEXT:  ; %bb.1: ; %bb
 ; CHECK-NEXT:    v_cvt_f16_f32_e64 v0, v0
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
 ; CHECK-NEXT:    v_cvt_f16_f32_e64 v0, s4
 ; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %input = load <1 x half>, ptr addrspace(1) %src
   br label %bb

 bb:
   %phi = phi <1 x half> [ %input, %entry ]
   %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
   store <2 x half> %res, ptr addrspace(1) %dst
   ret void
 }

 ; Same as phi_vec1bf16_to_f32 but with bfloat instead of half.
 define void @phi_vec1bf16_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
 ; CHECK-LABEL: phi_vec1bf16_to_f32:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
 ; CHECK-NEXT:    s_mov_b32 s4, 16
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_lshlrev_b32_e64 v0, s4, v0
 ; CHECK-NEXT:  ; %bb.1: ; %bb
 ; CHECK-NEXT:    v_mul_f32_e64 v0, 1.0, v0
 ; CHECK-NEXT:    s_mov_b32 s4, 16
 ; CHECK-NEXT:    v_lshrrev_b32_e64 v0, s4, v0
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %input = load <1 x bfloat>, ptr addrspace(1) %src
   br label %bb

 bb:
   %phi = phi <1 x bfloat> [ %input, %entry ]
   %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
   store <2 x bfloat> %res, ptr addrspace(1) %dst
   ret void
 }

 ; Same as phi_vec1half_to_f32_with_const_folding but with bfloat instead of half.
 define void @phi_vec1bf16_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
 ; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0
 ; CHECK-NEXT:  ; %bb.1: ; %bb
 ; CHECK-NEXT:    v_mul_f32_e64 v2, 1.0, s4
 ; CHECK-NEXT:    s_mov_b32 s4, 16
 ; CHECK-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
 ; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   br label %bb

 bb:
   %phi = phi <1 x bfloat> [ zeroinitializer, %entry ]
   %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
   store <2 x bfloat> %res, ptr addrspace(1) %dst
   ret void
 }

 attributes #0 = { noinline optnone }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
	; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=hawaii %s -o - \| FileCheck %s

	; For all these tests we disable optimizations through function attributes
	; because the code we are exercising here needs phis and we want to keep the
	; IR small.

	; This code used to crash in SDISel because f16 was promoted to f32 through
	; a `f32 = vector_extract_elt <1 x f16>, i32 0`, which is illegal.
	; The invalid SDNode and thus, the crash was only exposed by the constant
	; folding.
	define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
	; CHECK-LABEL: phi_vec1half_to_f32_with_const_folding:
	; CHECK: ; %bb.0: ; %entry
	; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; CHECK-NEXT: s_mov_b32 s4, 0
	; CHECK-NEXT: ; %bb.1: ; %bb
	; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
	; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4
	; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: s_setpc_b64 s[30:31]
	entry:
	br label %bb

	bb:
	%phi = phi <1 x half> [ zeroinitializer, %entry ]
	%res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
	store <2 x half> %res, ptr addrspace(1) %dst
	ret void
	}

	; Same as phi_vec1half_to_f32_with_const_folding but without the folding.
	; This test exercises the same invalid SDNode, but it happened to work by
	; accident before. Here we make sure the fix also work as expected in the
	; non-constant folding case.
	define void @phi_vec1half_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
	; CHECK-LABEL: phi_vec1half_to_f32:
	; CHECK: ; %bb.0: ; %entry
	; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: v_cvt_f32_f16_e64 v0, v0
	; CHECK-NEXT: ; %bb.1: ; %bb
	; CHECK-NEXT: v_cvt_f16_f32_e64 v0, v0
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
	; CHECK-NEXT: v_cvt_f16_f32_e64 v0, s4
	; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: s_setpc_b64 s[30:31]
	entry:
	%input = load <1 x half>, ptr addrspace(1) %src
	br label %bb

	bb:
	%phi = phi <1 x half> [ %input, %entry ]
	%res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
	store <2 x half> %res, ptr addrspace(1) %dst
	ret void
	}

	; Same as phi_vec1bf16_to_f32 but with bfloat instead of half.
	define void @phi_vec1bf16_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
	; CHECK-LABEL: phi_vec1bf16_to_f32:
	; CHECK: ; %bb.0: ; %entry
	; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
	; CHECK-NEXT: s_mov_b32 s4, 16
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: v_lshlrev_b32_e64 v0, s4, v0
	; CHECK-NEXT: ; %bb.1: ; %bb
	; CHECK-NEXT: v_mul_f32_e64 v0, 1.0, v0
	; CHECK-NEXT: s_mov_b32 s4, 16
	; CHECK-NEXT: v_lshrrev_b32_e64 v0, s4, v0
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: s_setpc_b64 s[30:31]
	entry:
	%input = load <1 x bfloat>, ptr addrspace(1) %src
	br label %bb

	bb:
	%phi = phi <1 x bfloat> [ %input, %entry ]
	%res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
	store <2 x bfloat> %res, ptr addrspace(1) %dst
	ret void
	}

	; Same as phi_vec1half_to_f32_with_const_folding but with bfloat instead of half.
	define void @phi_vec1bf16_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
	; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding:
	; CHECK: ; %bb.0: ; %entry
	; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; CHECK-NEXT: s_mov_b32 s4, 0
	; CHECK-NEXT: ; %bb.1: ; %bb
	; CHECK-NEXT: v_mul_f32_e64 v2, 1.0, s4
	; CHECK-NEXT: s_mov_b32 s4, 16
	; CHECK-NEXT: v_lshrrev_b32_e32 v2, s4, v2
	; CHECK-NEXT: s_mov_b32 s7, 0xf000
	; CHECK-NEXT: s_mov_b32 s6, 0
	; CHECK-NEXT: s_mov_b32 s4, s6
	; CHECK-NEXT: s_mov_b32 s5, s6
	; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
	; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: s_setpc_b64 s[30:31]
	entry:
	br label %bb

	bb:
	%phi = phi <1 x bfloat> [ zeroinitializer, %entry ]
	%res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
	store <2 x bfloat> %res, ptr addrspace(1) %dst
	ret void
	}

	attributes #0 = { noinline optnone }