[AArch64] Fix FPMR handling when switching streaming mode (#135827)
According to the
[documentation](https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers/FPMR--Floating-point-Mode-Register),
the FPMR register is set to 0 when entering or exiting streaming mode.
This patch models that behavior by adding FPMR as an implicit def to the
instructions used for entering and exiting streaming mode.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index bea8087..a95d8d3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8882,12 +8882,15 @@
MI.removeOperand(I);
// The SVE vector length can change when entering/leaving streaming mode.
+ // FPMR is set to 0 when entering/leaving streaming mode.
if (MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
/*IsImplicit=*/true));
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/true,
/*IsImplicit=*/true));
+ MI.addOperand(MachineOperand::CreateReg(AArch64::FPMR, /*IsDef=*/true,
+ /*IsImplicit=*/true));
}
}
diff --git a/llvm/test/CodeGen/AArch64/sme-write-fpmr.ll b/llvm/test/CodeGen/AArch64/sme-write-fpmr.ll
new file mode 100644
index 0000000..074d6583
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-write-fpmr.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mattr=+sme -stop-after=finalize-isel < %s | FileCheck %s
+
+target triple = "aarch64"
+
+; Check that we don't define FPMR for 'smstart za' and 'smstop za'
+define void @smstart_za() "aarch64_new_za" nounwind {
+ ; CHECK-LABEL: name: smstart_za
+ ; CHECK-NOT: implicit-def {{[^,]*}}$fpmr
+ ret void
+}
+
+; Check that we do define FPMR for 'smstart sm' and 'smstop sm'
+define void @smstart_sm() nounwind {
+ ; CHECK-LABEL: name: smstart_sm
+ ; CHECK: MSRpstatesvcrImm1 1, 1,
+ ; CHECK-SAME: implicit-def {{[^,]*}}$fpmr
+ ; CHECK: MSRpstatesvcrImm1 1, 0,
+ ; CHECK-SAME: implicit-def {{[^,]*}}$fpmr
+ call void @require_sm()
+ ret void
+}
+
+declare void @require_sm() "aarch64_pstate_sm_enabled"