[AArch64] Stop reserved registers from being saved in prolog/epilog (#138448)

[GCC's
documentation](https://gcc.gnu.org/onlinedocs/gcc-15.1.0/gcc/Code-Gen-Options.html)
is clear on how -ffixed-reg must behave:
```
  Treat the register named reg as a fixed register; generated
  code should never refer to it (except perhaps as a stack pointer,
  frame pointer or in some other fixed role).
```

This implies prolog/epilog code also must not save/restore explicitly
fixed registers, even when it is callee-saved. Some projects rely on
this (GCC's) behavior.

For example,
```
void f() {
  register uint64_t x28 asm("x28") = 0xee;
  asm volatile("" : "+r"(x28)); // avoid mov being eliminated
}
```
should not touch x28 outside of `mov w28,#0xee`.

For riscv64, clang behaves the same as GCC, so I am inclined to believe
this is indeed a bug.

Fixes #111379.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 78ac57e..040662a 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3619,6 +3619,13 @@
     if (Reg == BasePointerReg)
       SavedRegs.set(Reg);
 
+    // Don't save manually reserved registers set through +reserve-x#i,
+    // even for callee-saved registers, as per GCC's behavior.
+    if (RegInfo->isUserReservedReg(MF, Reg)) {
+      SavedRegs.reset(Reg);
+      continue;
+    }
+
     bool RegUsed = SavedRegs.test(Reg);
     unsigned PairedReg = AArch64::NoRegister;
     const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 9f242bb..1dc7318 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -519,6 +519,18 @@
 }
 
 BitVector
+AArch64RegisterInfo::getUserReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
+    // ReserveXRegister is set for registers manually reserved
+    // through +reserve-x#i.
+    if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
+      markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
+  }
+  return Reserved;
+}
+
+BitVector
 AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
@@ -551,6 +563,11 @@
   return getReservedRegs(MF)[Reg];
 }
 
+bool AArch64RegisterInfo::isUserReservedReg(const MachineFunction &MF,
+                                            MCRegister Reg) const {
+  return getUserReservedRegs(MF)[Reg];
+}
+
 bool AArch64RegisterInfo::isStrictlyReservedReg(const MachineFunction &MF,
                                                 MCRegister Reg) const {
   return getStrictlyReservedRegs(MF)[Reg];
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index ddee0d6..cc94be6 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -35,6 +35,7 @@
   }
 
   bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const;
+  bool isUserReservedReg(const MachineFunction &MF, MCRegister Reg) const;
   bool isStrictlyReservedReg(const MachineFunction &MF, MCRegister Reg) const;
   bool isAnyArgRegReserved(const MachineFunction &MF) const;
   void emitReservedArgRegCallError(const MachineFunction &MF) const;
@@ -93,6 +94,7 @@
   const uint32_t *getWindowsStackProbePreservedMask() const;
 
   BitVector getStrictlyReservedRegs(const MachineFunction &MF) const;
+  BitVector getUserReservedRegs(const MachineFunction &MF) const;
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   std::optional<std::string>
   explainReservedReg(const MachineFunction &MF,
diff --git a/llvm/test/CodeGen/AArch64/reserveXreg-for-regalloc.ll b/llvm/test/CodeGen/AArch64/reserveXreg-for-regalloc.ll
new file mode 100644
index 0000000..e0f2155
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/reserveXreg-for-regalloc.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -reserve-regs-for-regalloc=LR,FP,X28,X27,X26,X25,X24,X23,X22,X21,X20,X19,X18,X17,X16,X15,X14,X13,X12,X11,X10,X9,X8,X7,X6,X5,X4 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -reserve-regs-for-regalloc=X30,X29,X28,X27,X26,X25,X24,X23,X22,X21,X20,X19,X18,X17,X16,X15,X14,X13,X12,X11,X10,X9,X8,X7,X6,X5,X4 | FileCheck %s
+
+; LR, FP, X30 and X29 should be correctly recognized and not used.
+
+define void @foo(i64 %v1, i64 %v2, ptr %ptr) {
+; CHECK-LABEL: foo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    add x3, x0, x1
+; CHECK-NEXT:    str x3, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    str x3, [x2, #8]
+; CHECK-NEXT:    ldr x3, [x2, #16]
+; CHECK-NEXT:    add x3, x0, x3
+; CHECK-NEXT:    sub x3, x3, x1
+; CHECK-NEXT:    str x3, [x2, #16]
+; CHECK-NEXT:    ldr x3, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    str x3, [x2, #24]
+; CHECK-NEXT:    str x0, [x2, #32]
+; CHECK-NEXT:    str x1, [x2, #40]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+  %v3 = add i64 %v1, %v2
+  %p1 = getelementptr i64, ptr %ptr, i64 1
+  store volatile i64 %v3, ptr %p1, align 8
+
+  %p2 = getelementptr i64, ptr %ptr, i64 2
+  %v4 = load volatile i64, ptr %p2, align 8
+  %v5 = add i64 %v1, %v4
+  %v6 = sub i64 %v5, %v2
+  store volatile i64 %v6, ptr %p2, align 8
+
+  %p3 = getelementptr i64, ptr %ptr, i64 3
+  store volatile i64 %v3, ptr %p3, align 8
+
+  %p4 = getelementptr i64, ptr %ptr, i64 4
+  store volatile i64 %v1, ptr %p4, align 8
+  %p5 = getelementptr i64, ptr %ptr, i64 5
+  store volatile i64 %v2, ptr %p5, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/reserveXreg.ll b/llvm/test/CodeGen/AArch64/reserveXreg.ll
index e0f2155..86ed536 100644
--- a/llvm/test/CodeGen/AArch64/reserveXreg.ll
+++ b/llvm/test/CodeGen/AArch64/reserveXreg.ll
@@ -1,43 +1,303 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -reserve-regs-for-regalloc=LR,FP,X28,X27,X26,X25,X24,X23,X22,X21,X20,X19,X18,X17,X16,X15,X14,X13,X12,X11,X10,X9,X8,X7,X6,X5,X4 | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -reserve-regs-for-regalloc=X30,X29,X28,X27,X26,X25,X24,X23,X22,X21,X20,X19,X18,X17,X16,X15,X14,X13,X12,X11,X10,X9,X8,X7,X6,X5,X4 | FileCheck %s
+;; Check if manually reserved registers are always excluded from being saved by
+;; the function prolog/epilog, even for callee-saved ones, as per GCC behavior.
+;; X19(BP, LLVM specific), X29(FP), X30(LP), X31(SP) are special so
+;; they are not checked.
 
-; LR, FP, X30 and X29 should be correctly recognized and not used.
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s
 
-define void @foo(i64 %v1, i64 %v2, ptr %ptr) {
-; CHECK-LABEL: foo:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    add x3, x0, x1
-; CHECK-NEXT:    str x3, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    str x3, [x2, #8]
-; CHECK-NEXT:    ldr x3, [x2, #16]
-; CHECK-NEXT:    add x3, x0, x3
-; CHECK-NEXT:    sub x3, x3, x1
-; CHECK-NEXT:    str x3, [x2, #16]
-; CHECK-NEXT:    ldr x3, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT:    str x3, [x2, #24]
-; CHECK-NEXT:    str x0, [x2, #32]
-; CHECK-NEXT:    str x1, [x2, #40]
-; CHECK-NEXT:    add sp, sp, #16
-; CHECK-NEXT:    ret
-  %v3 = add i64 %v1, %v2
-  %p1 = getelementptr i64, ptr %ptr, i64 1
-  store volatile i64 %v3, ptr %p1, align 8
+define preserve_mostcc void @t1() "target-features"="+reserve-x1" {
+; CHECK-LABEL: t1:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w1, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x1},{x1}"(i64 256)
+  ret void
+}
 
-  %p2 = getelementptr i64, ptr %ptr, i64 2
-  %v4 = load volatile i64, ptr %p2, align 8
-  %v5 = add i64 %v1, %v4
-  %v6 = sub i64 %v5, %v2
-  store volatile i64 %v6, ptr %p2, align 8
+define preserve_mostcc void @t2() "target-features"="+reserve-x2" {
+; CHECK-LABEL: t2:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w2, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x2},{x2}"(i64 256)
+  ret void
+}
 
-  %p3 = getelementptr i64, ptr %ptr, i64 3
-  store volatile i64 %v3, ptr %p3, align 8
+define preserve_mostcc void @t3() "target-features"="+reserve-x3" {
+; CHECK-LABEL: t3:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w3, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x3},{x3}"(i64 256)
+  ret void
+}
 
-  %p4 = getelementptr i64, ptr %ptr, i64 4
-  store volatile i64 %v1, ptr %p4, align 8
-  %p5 = getelementptr i64, ptr %ptr, i64 5
-  store volatile i64 %v2, ptr %p5, align 8
+define preserve_mostcc void @t4() "target-features"="+reserve-x4" {
+; CHECK-LABEL: t4:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w4, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x4},{x4}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t5() "target-features"="+reserve-x5" {
+; CHECK-LABEL: t5:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w5, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x5},{x5}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t6() "target-features"="+reserve-x6" {
+; CHECK-LABEL: t6:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w6, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x6},{x6}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t7() "target-features"="+reserve-x7" {
+; CHECK-LABEL: t7:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w7, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x7},{x7}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t8() "target-features"="+reserve-x8" {
+; CHECK-LABEL: t8:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w8, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x8},{x8}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t9() "target-features"="+reserve-x9" {
+; CHECK-LABEL: t9:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w9, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x9},{x9}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t10() "target-features"="+reserve-x10" {
+; CHECK-LABEL: t10:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w10, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x10},{x10}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t11() "target-features"="+reserve-x11" {
+; CHECK-LABEL: t11:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w11, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x11},{x11}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t12() "target-features"="+reserve-x12" {
+; CHECK-LABEL: t12:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w12, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x12},{x12}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t13() "target-features"="+reserve-x13" {
+; CHECK-LABEL: t13:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w13, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x13},{x13}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t14() "target-features"="+reserve-x14" {
+; CHECK-LABEL: t14:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w14, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x14},{x14}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t15() "target-features"="+reserve-x15" {
+; CHECK-LABEL: t15:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w15, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x15},{x15}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t16() "target-features"="+reserve-x16" {
+; CHECK-LABEL: t16:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w16, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x16},{x16}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t17() "target-features"="+reserve-x17" {
+; CHECK-LABEL: t17:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w17, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x17},{x17}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t18() "target-features"="+reserve-x18" {
+; CHECK-LABEL: t18:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w18, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x18},{x18}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t20() "target-features"="+reserve-x20" {
+; CHECK-LABEL: t20:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w20, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x20},{x20}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t21() "target-features"="+reserve-x21" {
+; CHECK-LABEL: t21:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w21, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x21},{x21}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t22() "target-features"="+reserve-x22" {
+; CHECK-LABEL: t22:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w22, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x22},{x22}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t23() "target-features"="+reserve-x23" {
+; CHECK-LABEL: t23:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w23, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x23},{x23}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t24() "target-features"="+reserve-x24" {
+; CHECK-LABEL: t24:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w24, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x24},{x24}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t25() "target-features"="+reserve-x25" {
+; CHECK-LABEL: t25:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w25, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x25},{x25}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t26() "target-features"="+reserve-x26" {
+; CHECK-LABEL: t26:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w26, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x26},{x26}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t27() "target-features"="+reserve-x27" {
+; CHECK-LABEL: t27:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w27, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x27},{x27}"(i64 256)
+  ret void
+}
+
+define preserve_mostcc void @t28() "target-features"="+reserve-x28" {
+; CHECK-LABEL: t28:
+; CHECK: // %bb.0:
+; CHECK-NEXT:        mov     w28, #256
+; CHECK-NEXT:        //APP
+; CHECK-NEXT:        //NO_APP
+; CHECK-NEXT:        ret
+  call i64 asm sideeffect "", "={x28},{x28}"(i64 256)
   ret void
 }