blob: e7bb8825fec05940867f3770207316c72c452d34 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s
; Test llvm.convert.from.arbitrary intrinsic expansion.
declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
; Float8E5M2
; Layout: sign(1) exp(5) mant(2), bias=15
; Supports: Inf, NaN, signed zero, denormals
; Float8E5M2 normal: 0_01111_00 = 1.0
define float @from_f8e5m2_normal() {
; CHECK-LABEL: from_f8e5m2_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 zero: 0_00000_00 = +0.0
define float @from_f8e5m2_zero() {
; CHECK-LABEL: from_f8e5m2_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 negative zero: 1_00000_00 = -0.0
define float @from_f8e5m2_neg_zero() {
; CHECK-LABEL: from_f8e5m2_neg_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_bfrev_b32_e32 v0, 1
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
define float @from_f8e5m2_denorm() {
; CHECK-LABEL: from_f8e5m2_denorm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x37800000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 +Inf: 0_11111_00
define float @from_f8e5m2_inf() {
; CHECK-LABEL: from_f8e5m2_inf:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7f800000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 NaN: 0_11111_01
define float @from_f8e5m2_nan() {
; CHECK-LABEL: from_f8e5m2_nan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 max: 0_11110_11 = 57344
define float @from_f8e5m2_max() {
; CHECK-LABEL: from_f8e5m2_max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x47600000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 negative: 1_01111_00 = -1.0
define float @from_f8e5m2_neg() {
; CHECK-LABEL: from_f8e5m2_neg:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
ret float %r
}
; Float8E5M2 runtime arg test
define float @from_f8e5m2_dynamic(i8 %x) {
; CHECK-LABEL: from_f8e5m2_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 5
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
; CHECK-NEXT: v_sub_u32_e32 v4, 0x8e, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; CHECK-NEXT: v_add_u32_e32 v2, 0x38000000, v2
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 31, v0
; CHECK-NEXT: v_or_b32_e32 v0, 0x7f800000, v3
; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; CHECK-NEXT: s_and_b64 vcc, s[6:7], vcc
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
ret float %r
}
; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
; Float8E4M3FN normal: 0_0111_000 = 1.0
define float @from_f8e4m3fn_normal() {
; CHECK-LABEL: from_f8e4m3fn_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN NaN: 0_1111_111
define float @from_f8e4m3fn_nan() {
; CHECK-LABEL: from_f8e4m3fn_nan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN not-NaN: 0_1111_110 = 448
; Despite exp=all-ones, this is a valid finite number (max value)
define float @from_f8e4m3fn_max() {
; CHECK-LABEL: from_f8e4m3fn_max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x43e00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN not-NaN: 0_1111_101 = 416
; exp=all-ones but mant!=all-ones so this is finite
define float @from_f8e4m3fn_not_nan() {
; CHECK-LABEL: from_f8e4m3fn_not_nan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x43d00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN zero: 0_0000_000 = +0.0
define float @from_f8e4m3fn_zero() {
; CHECK-LABEL: from_f8e4m3fn_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
define float @from_f8e4m3fn_denorm() {
; CHECK-LABEL: from_f8e4m3fn_denorm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3b000000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
ret float %r
}
; Float8E4M3FN runtime arg test
define float @from_f8e4m3fn_dynamic(i8 %x) {
; CHECK-LABEL: from_f8e4m3fn_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 4
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
; CHECK-NEXT: v_sub_u32_e32 v4, 0x95, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; CHECK-NEXT: v_add_u32_e32 v2, 0x3c000000, v2
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 15, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
ret float %r
}
; Float6E3M2FN (FiniteOnly)
; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
; No Inf, no NaN. All bit patterns are finite.
; Float6E3M2FN normal: 0_011_00 = 1.0
define float @from_f6e3m2fn_normal() {
; CHECK-LABEL: from_f6e3m2fn_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E3M2FN max: 0_111_11 = 28.0
define float @from_f6e3m2fn_max() {
; CHECK-LABEL: from_f6e3m2fn_max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x41e00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E3M2FN denorm: 0_000_01 = 0.0625
define float @from_f6e3m2fn_denorm() {
; CHECK-LABEL: from_f6e3m2fn_denorm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3d800000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E3M2FN zero: 0_000_00 = +0.0
define float @from_f6e3m2fn_zero() {
; CHECK-LABEL: from_f6e3m2fn_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E3M2FN negative: 1_011_00 = -1.0
define float @from_f6e3m2fn_neg() {
; CHECK-LABEL: from_f6e3m2fn_neg:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E3M2FN runtime arg test
define float @from_f6e3m2fn_dynamic(i6 %x) {
; CHECK-LABEL: from_f6e3m2fn_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 3
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
; CHECK-NEXT: v_sub_u32_e32 v4, 0x9a, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; CHECK-NEXT: v_add_u32_e32 v2, 0x3e000000, v2
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
ret float %r
}
; Float6E2M3FN (FiniteOnly)
; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
; No Inf, no NaN. All bit patterns are finite.
; Float6E2M3FN normal: 0_01_000 = 1.0
define float @from_f6e2m3fn_normal() {
; CHECK-LABEL: from_f6e2m3fn_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
ret float %r
}
; Float6E2M3FN max: 0_11_111 = 7.5
define float @from_f6e2m3fn_max() {
; CHECK-LABEL: from_f6e2m3fn_max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x40f00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
ret float %r
}
; Float6E2M3FN denorm: 0_00_001 = 0.125
define float @from_f6e2m3fn_denorm() {
; CHECK-LABEL: from_f6e2m3fn_denorm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3e000000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
ret float %r
}
; Float6E2M3FN zero: 0_00_000 = +0.0
define float @from_f6e2m3fn_zero() {
; CHECK-LABEL: from_f6e2m3fn_zero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
ret float %r
}
; Float6E2M3FN runtime arg test
define float @from_f6e2m3fn_dynamic(i6 %x) {
; CHECK-LABEL: from_f6e2m3fn_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
; CHECK-NEXT: v_sub_u32_e32 v4, 0x9b, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; CHECK-NEXT: v_add_u32_e32 v2, 0.5, v2
; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
ret float %r
}
; Float4E2M1FN (FiniteOnly)
; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
; No Inf, no NaN.
; Float4E2M1FN normal: 0_01_0 = 1.0
define float @from_f4e2m1fn_normal() {
; CHECK-LABEL: from_f4e2m1fn_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
ret float %r
}
; Float4E2M1FN denorm: 0_00_1 = 0.5
define float @from_f4e2m1fn_denorm() {
; CHECK-LABEL: from_f4e2m1fn_denorm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0.5
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
ret float %r
}
; Float4E2M1FN max: 0_11_1 = 6.0
define float @from_f4e2m1fn_max() {
; CHECK-LABEL: from_f4e2m1fn_max:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x40c00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
ret float %r
}
; Float4E2M1FN runtime arg test
define float @from_f4e2m1fn_dynamic(i4 %x) {
; CHECK-LABEL: from_f4e2m1fn_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v2, 1, v1
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v1
; CHECK-NEXT: v_bfe_u32 v1, v1, 1, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 22, v2
; CHECK-NEXT: v_and_b32_e32 v4, 0x80000000, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v1
; CHECK-NEXT: v_or3_b32 v3, v5, v4, v3
; CHECK-NEXT: v_ffbh_u32_e32 v5, v2
; CHECK-NEXT: v_sub_u32_e32 v6, 31, v5
; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, 1
; CHECK-NEXT: v_xor_b32_e32 v6, v2, v6
; CHECK-NEXT: v_add_u32_e32 v7, -8, v5
; CHECK-NEXT: v_sub_u32_e32 v5, 0x9d, v5
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v6, v7, v6
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v5
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; CHECK-NEXT: v_add_u32_e32 v3, 0.5, v3
; CHECK-NEXT: v_or3_b32 v5, v4, v5, v6
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
ret float %r
}
; Float8E5M2 to f16: 1.0
define half @from_f8e5m2_to_f16() {
; CHECK-LABEL: from_f8e5m2_to_f16:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
ret half %r
}
; Float8E5M2 to f64: 1.0
define double @from_f8e5m2_to_f64() {
; CHECK-LABEL: from_f8e5m2_to_f64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
ret double %r
}
; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
; CHECK-LABEL: fp4_to_f32_vec:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v0
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v2
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v8, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v3
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v3
; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
ret <4 x float> %r
}