[AArch64][GlobalISel] SIMD fpcvt codegen for rounding nodes (#171446)
This is followup to patch to
https://github.com/llvm/llvm-project/pull/165546, which allowed simd
fpcvt instructions to be generated from l/llround and l/llrint nodes.
This patch extends this work to enable simd fpcvt instruction generation
with GlobalISel.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 835fd05..c22929f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6787,14 +6787,14 @@
// For global-isel we can use register classes to determine
// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
- def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
- (!cast<Instruction>(INST # SHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
- (!cast<Instruction>(INST # DHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
- (!cast<Instruction>(INST # DSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
- (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
}
def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))),
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
@@ -6802,14 +6802,14 @@
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
let Predicates = [HasFPRCVT] in {
- def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
- (!cast<Instruction>(INST # SHr) f16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
- (!cast<Instruction>(INST # DHr) f16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
- (!cast<Instruction>(INST # DSr) f32:$Rn)>;
- def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
- (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
@@ -6826,19 +6826,42 @@
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+// For global-isel we can use register classes to determine
+// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
- def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))),
- (FCVTASSHr f16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (any_lround f16:$Rn)))),
- (FCVTASDHr f16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (any_llround f16:$Rn)))),
- (FCVTASDHr f16:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (any_lround f32:$Rn)))),
- (FCVTASDSr f32:$Rn)>;
- def : Pat<(f32 (bitconvert (i32 (any_lround f64:$Rn)))),
- (FCVTASSDr f64:$Rn)>;
- def : Pat<(f64 (bitconvert (i64 (any_llround f32:$Rn)))),
- (FCVTASDSr f32:$Rn)>;
+def : Pat<(i32 (lround f16:$Rn)),
+ (FCVTASSHr f16:$Rn)>;
+def : Pat<(i32 (lround f64:$Rn)),
+ (FCVTASSDr f64:$Rn)>;
+def : Pat<(i64 (lround f16:$Rn)),
+ (FCVTASDHr f16:$Rn)>;
+def : Pat<(i64 (llround f16:$Rn)),
+ (FCVTASDHr f16:$Rn)>;
+def : Pat<(i64 (lround f32:$Rn)),
+ (FCVTASDSr f32:$Rn)>;
+def : Pat<(i64 (llround f32:$Rn)),
+ (FCVTASDSr f32:$Rn)>;
+}
+def : Pat<(i32 (lround f32:$Rn)),
+ (FCVTASv1i32 f32:$Rn)>;
+def : Pat<(i64 (lround f64:$Rn)),
+ (FCVTASv1i64 f64:$Rn)>;
+def : Pat<(i64 (llround f64:$Rn)),
+ (FCVTASv1i64 f64:$Rn)>;
+
+let Predicates = [HasFPRCVT] in {
+def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))),
+ (FCVTASSHr f16:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_lround f16:$Rn)))),
+ (FCVTASDHr f16:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_llround f16:$Rn)))),
+ (FCVTASDHr f16:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_lround f32:$Rn)))),
+ (FCVTASDSr f32:$Rn)>;
+def : Pat<(f32 (bitconvert (i32 (any_lround f64:$Rn)))),
+ (FCVTASSDr f64:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 (any_llround f32:$Rn)))),
+ (FCVTASDSr f32:$Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (any_lround f32:$Rn)))),
(FCVTASv1i32 f32:$Rn)>;
@@ -6847,19 +6870,42 @@
def : Pat<(f64 (bitconvert (i64 (any_llround f64:$Rn)))),
(FCVTASv1i64 f64:$Rn)>;
+// For global-isel we can use register classes to determine
+// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
- def : Pat<(f32 (bitconvert (i32 (any_lrint f16:$Rn)))),
- (FCVTZSSHr (FRINTXHr f16:$Rn))>;
- def : Pat<(f64 (bitconvert (i64 (any_lrint f16:$Rn)))),
- (FCVTZSDHr (FRINTXHr f16:$Rn))>;
- def : Pat<(f64 (bitconvert (i64 (any_llrint f16:$Rn)))),
- (FCVTZSDHr (FRINTXHr f16:$Rn))>;
- def : Pat<(f64 (bitconvert (i64 (any_lrint f32:$Rn)))),
- (FCVTZSDSr (FRINTXSr f32:$Rn))>;
- def : Pat<(f32 (bitconvert (i32 (any_lrint f64:$Rn)))),
- (FCVTZSSDr (FRINTXDr f64:$Rn))>;
- def : Pat<(f64 (bitconvert (i64 (any_llrint f32:$Rn)))),
- (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+def : Pat<(i32 (lrint f16:$Rn)),
+ (FCVTZSSHr (FRINTXHr f16:$Rn))>;
+def : Pat<(i32 (lrint f64:$Rn)),
+ (FCVTZSSDr (FRINTXDr f64:$Rn))>;
+def : Pat<(i64 (lrint f16:$Rn)),
+ (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(i64 (llrint f16:$Rn)),
+ (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(i64 (lrint f32:$Rn)),
+ (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+def : Pat<(i64 (llrint f32:$Rn)),
+ (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+}
+def : Pat<(i32 (lrint f32:$Rn)),
+ (FCVTZSv1i32 (FRINTXSr f32:$Rn))>;
+def : Pat<(i64 (lrint f64:$Rn)),
+ (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+def : Pat<(i64 (llrint f64:$Rn)),
+ (FCVTZSv1i64 (FRINTXDr f64:$Rn))>;
+
+let Predicates = [HasFPRCVT] in {
+def : Pat<(f32 (bitconvert (i32 (any_lrint f16:$Rn)))),
+ (FCVTZSSHr (FRINTXHr f16:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_lrint f16:$Rn)))),
+ (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_llrint f16:$Rn)))),
+ (FCVTZSDHr (FRINTXHr f16:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_lrint f32:$Rn)))),
+ (FCVTZSDSr (FRINTXSr f32:$Rn))>;
+def : Pat<(f32 (bitconvert (i32 (any_lrint f64:$Rn)))),
+ (FCVTZSSDr (FRINTXDr f64:$Rn))>;
+def : Pat<(f64 (bitconvert (i64 (any_llrint f32:$Rn)))),
+ (FCVTZSDSr (FRINTXSr f32:$Rn))>;
}
def : Pat<(f32 (bitconvert (i32 (any_lrint f32:$Rn)))),
(FCVTZSv1i32 (FRINTXSr f32:$Rn))>;
@@ -6871,8 +6917,8 @@
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
- def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
- def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>;
+def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
+def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>;
}
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4d3d081..d646ef8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -867,7 +867,11 @@
case TargetOpcode::G_FPTOSI_SAT:
case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ case TargetOpcode::G_INTRINSIC_LLRINT:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND: {
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
if (DstType.isVector())
break;
@@ -888,12 +892,6 @@
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
- case TargetOpcode::G_INTRINSIC_LRINT:
- case TargetOpcode::G_INTRINSIC_LLRINT:
- if (MRI.getType(MI.getOperand(0).getReg()).isVector())
- break;
- OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
- break;
case TargetOpcode::G_FCMP: {
// If the result is a vector, it must use a FPR.
AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
@@ -1233,12 +1231,6 @@
}
break;
}
- case TargetOpcode::G_LROUND:
- case TargetOpcode::G_LLROUND: {
- // Source is always floating point and destination is always integer.
- OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
- break;
- }
}
// Finally construct the computed mapping.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
index 420c7cf..16100f0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
@@ -14,7 +14,7 @@
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
- ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND %fpr(s64)
+ ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND %fpr(s64)
; CHECK-NEXT: $d0 = COPY %llround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
- ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND [[COPY]](s64)
+ ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND [[COPY]](s64)
; CHECK-NEXT: $d0 = COPY %llround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%gpr:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
index 775c6ca..5cb93f7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
@@ -14,7 +14,7 @@
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
- ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND %fpr(s64)
+ ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND %fpr(s64)
; CHECK-NEXT: $d0 = COPY %lround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
- ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND [[COPY]](s64)
+ ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND [[COPY]](s64)
; CHECK-NEXT: $d0 = COPY %lround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%gpr:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll
index 8717952..8da3bfa 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll
@@ -1,6 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FPRCVT
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFPRCVT
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FPRCVT
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFPRCVT
+
;
; Lround
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 4e86832..ae7617d 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -806,12 +806,18 @@
declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
-; CHECK-LABEL: llrint_v1i64_v1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: llrint_v1i64_v1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: frintx d0, d0
+; CHECK-SD-NEXT: fcvtzs x8, d0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: llrint_v1i64_v1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintx d0, d0
+; CHECK-GI-NEXT: fcvtzs d0, d0
+; CHECK-GI-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
}
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 6abed63..9eaad68 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -11,13 +11,13 @@
; RUN: FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI
define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
-; CHECK-i32-LABEL: lrint_v1f16:
-; CHECK-i32: // %bb.0:
-; CHECK-i32-NEXT: fcvt s0, h0
-; CHECK-i32-NEXT: frintx s0, s0
-; CHECK-i32-NEXT: fcvtzs w8, s0
-; CHECK-i32-NEXT: fmov s0, w8
-; CHECK-i32-NEXT: ret
+; CHECK-i32-SD-LABEL: lrint_v1f16:
+; CHECK-i32-SD: // %bb.0:
+; CHECK-i32-SD-NEXT: fcvt s0, h0
+; CHECK-i32-SD-NEXT: frintx s0, s0
+; CHECK-i32-SD-NEXT: fcvtzs w8, s0
+; CHECK-i32-SD-NEXT: fmov s0, w8
+; CHECK-i32-SD-NEXT: ret
;
; CHECK-i64-LABEL: lrint_v1f16:
; CHECK-i64: // %bb.0:
@@ -26,6 +26,13 @@
; CHECK-i64-NEXT: fcvtzs x8, s0
; CHECK-i64-NEXT: fmov d0, x8
; CHECK-i64-NEXT: ret
+;
+; CHECK-i32-GI-LABEL: lrint_v1f16:
+; CHECK-i32-GI: // %bb.0:
+; CHECK-i32-GI-NEXT: fcvt s0, h0
+; CHECK-i32-GI-NEXT: frintx s0, s0
+; CHECK-i32-GI-NEXT: fcvtzs s0, s0
+; CHECK-i32-GI-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
ret <1 x iXLen> %a
}
@@ -985,8 +992,7 @@
; CHECK-i32-GI-LABEL: lrint_v1f32:
; CHECK-i32-GI: // %bb.0:
; CHECK-i32-GI-NEXT: frintx s0, s0
-; CHECK-i32-GI-NEXT: fcvtzs w8, s0
-; CHECK-i32-GI-NEXT: fmov s0, w8
+; CHECK-i32-GI-NEXT: fcvtzs s0, s0
; CHECK-i32-GI-NEXT: ret
;
; CHECK-i64-GI-LABEL: lrint_v1f32:
@@ -1320,12 +1326,18 @@
; CHECK-i32-NEXT: fmov s0, w8
; CHECK-i32-NEXT: ret
;
-; CHECK-i64-LABEL: lrint_v1f64:
-; CHECK-i64: // %bb.0:
-; CHECK-i64-NEXT: frintx d0, d0
-; CHECK-i64-NEXT: fcvtzs x8, d0
-; CHECK-i64-NEXT: fmov d0, x8
-; CHECK-i64-NEXT: ret
+; CHECK-i64-SD-LABEL: lrint_v1f64:
+; CHECK-i64-SD: // %bb.0:
+; CHECK-i64-SD-NEXT: frintx d0, d0
+; CHECK-i64-SD-NEXT: fcvtzs x8, d0
+; CHECK-i64-SD-NEXT: fmov d0, x8
+; CHECK-i64-SD-NEXT: ret
+;
+; CHECK-i64-GI-LABEL: lrint_v1f64:
+; CHECK-i64-GI: // %bb.0:
+; CHECK-i64-GI-NEXT: frintx d0, d0
+; CHECK-i64-GI-NEXT: fcvtzs d0, d0
+; CHECK-i64-GI-NEXT: ret
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
ret <1 x iXLen> %a
}