AMDGPU/GlobalISel: RegBankLegalize rules for cvt fp8 e5m3 intrinsics (#196369)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 18d4ab3..5cc0d07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1749,13 +1749,15 @@
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
- amdgcn_cvt_pk_bf8_f32, amdgcn_cvt_pk_fp8_f32},
+ amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
+ amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
Standard)
.Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
- addRulesForIOpcs(
- {amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8, amdgcn_cvt_f32_fp8}, Standard)
+ addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
+ amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
+ Standard)
.Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll
index 4b59f9f7..c125c26 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float, i32, i32, i32)
@@ -232,3 +232,58 @@
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 3)
ret float %ret
}
+
+define amdgpu_ps i32 @test_cvt_pk_fp8_f32_e5m3_word0_sss(float inreg %x, float inreg %y, i32 inreg %old) {
+; GFX1250-TRUE16-LABEL: test_cvt_pk_fp8_f32_e5m3_word0_sss:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
+; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-TRUE16-NEXT: v_cvt_pk_fp8_f32 v0.l, s0, s1 clamp
+; GFX1250-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: test_cvt_pk_fp8_f32_e5m3_word0_sss:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_cvt_pk_fp8_f32 v0, s0, s1 clamp
+; GFX1250-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: test_cvt_pk_fp8_f32_e5m3_word0_sss:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_pk_fp8_f32 v0, s0, s1 clamp
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250-GISEL-NEXT: ; return to shader part epilog
+ %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float %x, float %y, i32 %old, i1 false)
+ ret i32 %ret
+}
+
+define amdgpu_ps i32 @test_cvt_sr_fp8_f32_e5m3_byte0_sss(float inreg %x, i32 inreg %r, i32 inreg %old) {
+; GFX1250-LABEL: test_cvt_sr_fp8_f32_e5m3_byte0_sss:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_sr_fp8_f32 v0, s0, s1 clamp
+; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250-NEXT: ; return to shader part epilog
+ %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %x, i32 %r, i32 %old, i32 0)
+ ret i32 %ret
+}
+
+define amdgpu_ps float @test_cvt_f32_fp8_e5m3_byte0_s(i32 inreg %a) {
+; GFX1250-LABEL: test_cvt_f32_fp8_e5m3_byte0_s:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, s0 clamp
+; GFX1250-NEXT: ; return to shader part epilog
+ %ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 0)
+ ret float %ret
+}