blob: 4d336ba011a72dc64183123fb749f47e20a839c7 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -stop-after=amdgpu-isel < %s | FileCheck %s
@lds = external local_unnamed_addr addrspace(3) global [4 x float], align 4
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float, float, i32, i1)
; expect no sgpr32 with subreg lo/hi16 generated
define i32 @test_cvt_pk_fp8_f32(float %x, float %y) {
; CHECK-LABEL: name: test_cvt_pk_fp8_f32
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec
; CHECK-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64_align2 = DS_READ2_B32_gfx9 killed [[V_MOV_B32_e32_]], 0, 1, 0, implicit $exec :: (dereferenceable load (s64) from @lds, align 4, addrspace 3)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[DS_READ2_B32_gfx9_]].sub0
; CHECK-NEXT: [[S_CVT_F16_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_F16_F32 killed [[COPY2]], implicit $mode
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_MOV_B32_]], killed [[S_CVT_F16_F32_]], implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
; CHECK-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[S_AND_B32_]], [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_MUL_I32_]], [[S_MOV_B32_1]], implicit-def dead $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[S_ADD_I32_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].lo16
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_16 = COPY [[COPY4]]
; CHECK-NEXT: [[V_CVT_PK_FP8_F32_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_PK_FP8_F32_t16_e64 0, [[COPY1]], 0, [[COPY]], [[COPY5]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].hi16
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_CVT_PK_FP8_F32_t16_e64_]], %subreg.lo16, killed [[COPY6]], %subreg.hi16
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 7
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 killed [[REG_SEQUENCE]], killed [[S_MOV_B32_2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.a2:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.UnifiedReturnBlock:
; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.0, [[V_MOV_B32_e32_2]], %bb.1
; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[PHI]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
%ptr = load <4 x float>, ptr addrspace(3) @lds, align 4
%f = extractelement <4 x float> %ptr, i32 0
%half = fptrunc float %f to half
%i16 = bitcast half %half to i16
%i32 = zext i16 %i16 to i32
%add = add i32 %i32, 1
%mul = mul i32 %add, 5
%pk = call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %mul, i1 false)
%icmp = icmp eq i32 %pk, 7
br i1 %icmp, label %a1, label %a2
a1:
ret i32 1
a2:
ret i32 2
}
define i32 @test_cvt_pk_fp8_f32_e5m3(float %x, float %y) {
; CHECK-LABEL: name: test_cvt_pk_fp8_f32_e5m3
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec
; CHECK-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64_align2 = DS_READ2_B32_gfx9 killed [[V_MOV_B32_e32_]], 0, 1, 0, implicit $exec :: (dereferenceable load (s64) from @lds, align 4, addrspace 3)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[DS_READ2_B32_gfx9_]].sub0
; CHECK-NEXT: [[S_CVT_F16_F32_:%[0-9]+]]:sreg_32 = nofpexcept S_CVT_F16_F32 killed [[COPY2]], implicit $mode
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_MOV_B32_]], killed [[S_CVT_F16_F32_]], implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
; CHECK-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[S_AND_B32_]], [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_MUL_I32_]], [[S_MOV_B32_1]], implicit-def dead $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[S_ADD_I32_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].lo16
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_16 = COPY [[COPY4]]
; CHECK-NEXT: [[V_CVT_PK_FP8_F32_gfx1250_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_PK_FP8_F32_gfx1250_t16_e64 0, [[COPY1]], 0, [[COPY]], -1, [[COPY5]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].hi16
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_CVT_PK_FP8_F32_gfx1250_t16_e64_]], %subreg.lo16, killed [[COPY6]], %subreg.hi16
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 7
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 killed [[REG_SEQUENCE]], killed [[S_MOV_B32_2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.a2:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.UnifiedReturnBlock:
; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.0, [[V_MOV_B32_e32_2]], %bb.1
; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[PHI]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
%ptr = load <4 x float>, ptr addrspace(3) @lds, align 4
%f = extractelement <4 x float> %ptr, i32 0
%half = fptrunc float %f to half
%i16 = bitcast half %half to i16
%i32 = zext i16 %i16 to i32
%add = add i32 %i32, 1
%mul = mul i32 %add, 5
%pk = call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float %x, float %y, i32 %mul, i1 false)
%icmp = icmp eq i32 %pk, 7
br i1 %icmp, label %a1, label %a2
a1:
ret i32 1
a2:
ret i32 2
}