[clang][AArch64] Correctly align HFA arguments when passed on the stack

When we pass a AArch64 Homogeneous Floating-Point
Aggregate (HFA) argument with increased alignment
requirements, for example

    struct S {
      __attribute__ ((__aligned__(16))) double v[4];
    };

Clang uses `[4 x double]` for the parameter, which is passed
on the stack at alignment 8, whereas it should be at
alignment 16, following Rule C.4 in
AAPCS (https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#642parameter-passing-rules)

Currently we don't have a way to express in LLVM IR the
alignment requirements of the function arguments. The align
attribute is applicable to pointers only, and only for some
special ways of passing arguments (e..g byval). When
implementing AAPCS32/AAPCS64, clang resorts to dubious hacks
of coercing to types, which naturally have the needed
alignment. We don't have enough types to cover all the
cases, though.

This patch introduces a new use of the stackalign attribute
to control stack slot alignment, when and if an argument is
passed in memory.

The attribute align is left as an optimizer hint - it still
applies to pointer types only and pertains to the content of
the pointer, whereas the alignment of the pointer itself is
determined by the stackalign attribute.

For byval arguments, the stackalign attribute assumes the
role, previously perfomed by align, falling back to align if
stackalign` is absent.

On the clang side, when passing arguments using the "direct"
style (cf. `ABIArgInfo::Kind`), now we can optionally
specify an alignment, which is emitted as the new
`stackalign` attribute.

Patch by Momchil Velikov and Lucas Prates.

Differential Revision: https://reviews.llvm.org/D98794

GitOrigin-RevId: f9d932e6735afe73117e142a12443449f2197e69
diff --git a/include/clang/CodeGen/CGFunctionInfo.h b/include/clang/CodeGen/CGFunctionInfo.h
index 253ef94..91d867e 100644
--- a/include/clang/CodeGen/CGFunctionInfo.h
+++ b/include/clang/CodeGen/CGFunctionInfo.h
@@ -94,12 +94,17 @@
     llvm::Type *UnpaddedCoerceAndExpandType; // isCoerceAndExpand()
   };
   union {
-    unsigned DirectOffset;     // isDirect() || isExtend()
-    unsigned IndirectAlign;    // isIndirect()
+    struct {
+      unsigned Offset;
+      unsigned Align;
+    } DirectAttr;              // isDirect() || isExtend()
+    struct {
+      unsigned Align;
+      unsigned AddrSpace;
+    } IndirectAttr;            // isIndirect()
     unsigned AllocaFieldIndex; // isInAlloca()
   };
   Kind TheKind;
-  unsigned IndirectAddrSpace : 24; // isIndirect()
   bool PaddingInReg : 1;
   bool InAllocaSRet : 1;    // isInAlloca()
   bool InAllocaIndirect : 1;// isInAlloca()
@@ -126,19 +131,20 @@
 
 public:
   ABIArgInfo(Kind K = Direct)
-      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
-        IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false),
+      : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K),
+        PaddingInReg(false), InAllocaSRet(false),
         InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false),
         SRetAfterThis(false), InReg(false), CanBeFlattened(false),
         SignExt(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
                               llvm::Type *Padding = nullptr,
-                              bool CanBeFlattened = true) {
+                              bool CanBeFlattened = true, unsigned Align = 0) {
     auto AI = ABIArgInfo(Direct);
     AI.setCoerceToType(T);
     AI.setPaddingType(Padding);
     AI.setDirectOffset(Offset);
+    AI.setDirectAlign(Align);
     AI.setCanBeFlattened(CanBeFlattened);
     return AI;
   }
@@ -154,6 +160,7 @@
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(true);
     return AI;
   }
@@ -164,6 +171,7 @@
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(false);
     return AI;
   }
@@ -299,11 +307,20 @@
   // Direct/Extend accessors
   unsigned getDirectOffset() const {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    return DirectOffset;
+    return DirectAttr.Offset;
   }
   void setDirectOffset(unsigned Offset) {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    DirectOffset = Offset;
+    DirectAttr.Offset = Offset;
+  }
+
+  unsigned getDirectAlign() const {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    return DirectAttr.Align;
+  }
+  void setDirectAlign(unsigned Align) {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    DirectAttr.Align = Align;
   }
 
   bool isSignExt() const {
@@ -369,11 +386,11 @@
   // Indirect accessors
   CharUnits getIndirectAlign() const {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    return CharUnits::fromQuantity(IndirectAlign);
+    return CharUnits::fromQuantity(IndirectAttr.Align);
   }
   void setIndirectAlign(CharUnits IA) {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    IndirectAlign = IA.getQuantity();
+    IndirectAttr.Align = IA.getQuantity();
   }
 
   bool getIndirectByVal() const {
@@ -387,12 +404,12 @@
 
   unsigned getIndirectAddrSpace() const {
     assert(isIndirectAliased() && "Invalid kind!");
-    return IndirectAddrSpace;
+    return IndirectAttr.AddrSpace;
   }
 
   void setIndirectAddrSpace(unsigned AddrSpace) {
     assert(isIndirectAliased() && "Invalid kind!");
-    IndirectAddrSpace = AddrSpace;
+    IndirectAttr.AddrSpace = AddrSpace;
   }
 
   bool getIndirectRealign() const {
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 1d71148..0474ddb 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -2363,6 +2363,7 @@
         Attrs.addAttribute(llvm::Attribute::Nest);
       else if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
+      Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));
       break;
 
     case ABIArgInfo::Indirect: {
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index bd3c265..3e0a269 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -5690,8 +5690,19 @@
   // In variadic functions on Windows, all composite types are treated alike,
   // no special handling of HFAs/HVAs.
   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
+    if (Kind != AArch64ABIInfo::AAPCS)
+      return ABIArgInfo::getDirect(
+          llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+
+    // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
+    // default otherwise.
+    unsigned Align =
+        getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
+    unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
+    Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
     return ABIArgInfo::getDirect(
-        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
+        nullptr, true, Align);
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
diff --git a/test/CodeGen/aarch64-args-hfa.c b/test/CodeGen/aarch64-args-hfa.c
new file mode 100644
index 0000000..4abdc42
--- /dev/null
+++ b/test/CodeGen/aarch64-args-hfa.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+
+typedef struct {
+  float v[2];
+} S0;
+
+// CHECK: define{{.*}} float @f0([2 x float] %h.coerce)
+float f0(S0 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} float @f0_call()
+// CHECK: %call = call float @f0([2 x float] %1)
+float f0_call() {
+  S0 h = {1.0f, 2.0f};
+  return f0(h);
+}
+typedef struct {
+  double v[2];
+} S1;
+
+// CHECK: define{{.*}} double @f1([2 x double] %h.coerce)
+double f1(S1 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f1_call()
+// CHECK: %call = call double @f1([2 x double] %1
+double f1_call() {
+  S1 h = {1.0, 2.0};
+  return f1(h);
+}
+typedef struct {
+  __attribute__((__aligned__(16))) double v[2];
+} S2;
+
+// CHECK-AAPCS:  define{{.*}} double @f2([2 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f2([2 x double] %h.coerce)
+double f2(S2 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f2_call()
+// CHECK-AAPCS:  %call = call double @f2([2 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f2([2 x double] %1
+double f2_call() {
+  S2 h = {1.0, 2.0};
+  return f2(h);
+}
+
+typedef struct {
+  __attribute__((__aligned__(32))) double v[4];
+} S3;
+
+// CHECK-AAPCS:  define{{.*}} double @f3([4 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f3([4 x double] %h.coerce)
+double f3(S3 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f3_call()
+// CHECK-AAPCS:  %call = call double @f3([4 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f3([4 x double] %1
+double f3_call() {
+  S3 h = {1.0, 2.0};
+  return f3(h);
+}