[clang][AArch64] Correctly align HFA arguments when passed on the stack When we pass a AArch64 Homogeneous Floating-Point Aggregate (HFA) argument with increased alignment requirements, for example struct S { __attribute__ ((__aligned__(16))) double v[4]; }; Clang uses `[4 x double]` for the parameter, which is passed on the stack at alignment 8, whereas it should be at alignment 16, following Rule C.4 in AAPCS (https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#642parameter-passing-rules) Currently we don't have a way to express in LLVM IR the alignment requirements of the function arguments. The align attribute is applicable to pointers only, and only for some special ways of passing arguments (e..g byval). When implementing AAPCS32/AAPCS64, clang resorts to dubious hacks of coercing to types, which naturally have the needed alignment. We don't have enough types to cover all the cases, though. This patch introduces a new use of the stackalign attribute to control stack slot alignment, when and if an argument is passed in memory. The attribute align is left as an optimizer hint - it still applies to pointer types only and pertains to the content of the pointer, whereas the alignment of the pointer itself is determined by the stackalign attribute. For byval arguments, the stackalign attribute assumes the role, previously perfomed by align, falling back to align if stackalign` is absent. On the clang side, when passing arguments using the "direct" style (cf. `ABIArgInfo::Kind`), now we can optionally specify an alignment, which is emitted as the new `stackalign` attribute. Patch by Momchil Velikov and Lucas Prates. Differential Revision: https://reviews.llvm.org/D98794 GitOrigin-RevId: f9d932e6735afe73117e142a12443449f2197e69

commit: 308e2c0318ee84d463ce58ff899c3e2beca92435 [log] [tgz]
author: Momchil Velikov <momchil.velikov@arm.com> Thu Apr 15 19:58:54 2021 +0100
committer: Copybara-Service <copybara-worker@google.com> Fri Apr 16 12:15:57 2021 -0700
tree: ba9185131aeed8a15d5367a99d53572cb51502e7
parent: 3eb21fca19fb65b419c03424f04cc3af90421e76 [diff]
diff --git a/include/clang/CodeGen/CGFunctionInfo.h b/include/clang/CodeGen/CGFunctionInfo.h
index 253ef94..91d867e 100644
--- a/include/clang/CodeGen/CGFunctionInfo.h
+++ b/include/clang/CodeGen/CGFunctionInfo.h

@@ -94,12 +94,17 @@
     llvm::Type *UnpaddedCoerceAndExpandType; // isCoerceAndExpand()
   };
   union {
-    unsigned DirectOffset;     // isDirect() || isExtend()
-    unsigned IndirectAlign;    // isIndirect()
+    struct {
+      unsigned Offset;
+      unsigned Align;
+    } DirectAttr;              // isDirect() || isExtend()
+    struct {
+      unsigned Align;
+      unsigned AddrSpace;
+    } IndirectAttr;            // isIndirect()
     unsigned AllocaFieldIndex; // isInAlloca()
   };
   Kind TheKind;
-  unsigned IndirectAddrSpace : 24; // isIndirect()
   bool PaddingInReg : 1;
   bool InAllocaSRet : 1;    // isInAlloca()
   bool InAllocaIndirect : 1;// isInAlloca()
@@ -126,19 +131,20 @@
 
 public:
   ABIArgInfo(Kind K = Direct)
-      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
-        IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false),
+      : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K),
+        PaddingInReg(false), InAllocaSRet(false),
         InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false),
         SRetAfterThis(false), InReg(false), CanBeFlattened(false),
         SignExt(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
                               llvm::Type *Padding = nullptr,
-                              bool CanBeFlattened = true) {
+                              bool CanBeFlattened = true, unsigned Align = 0) {
     auto AI = ABIArgInfo(Direct);
     AI.setCoerceToType(T);
     AI.setPaddingType(Padding);
     AI.setDirectOffset(Offset);
+    AI.setDirectAlign(Align);
     AI.setCanBeFlattened(CanBeFlattened);
     return AI;
   }
@@ -154,6 +160,7 @@
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(true);
     return AI;
   }
@@ -164,6 +171,7 @@
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(false);
     return AI;
   }
@@ -299,11 +307,20 @@
   // Direct/Extend accessors
   unsigned getDirectOffset() const {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    return DirectOffset;
+    return DirectAttr.Offset;
   }
   void setDirectOffset(unsigned Offset) {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    DirectOffset = Offset;
+    DirectAttr.Offset = Offset;
+  }
+
+  unsigned getDirectAlign() const {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    return DirectAttr.Align;
+  }
+  void setDirectAlign(unsigned Align) {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    DirectAttr.Align = Align;
   }
 
   bool isSignExt() const {
@@ -369,11 +386,11 @@
   // Indirect accessors
   CharUnits getIndirectAlign() const {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    return CharUnits::fromQuantity(IndirectAlign);
+    return CharUnits::fromQuantity(IndirectAttr.Align);
   }
   void setIndirectAlign(CharUnits IA) {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    IndirectAlign = IA.getQuantity();
+    IndirectAttr.Align = IA.getQuantity();
   }
 
   bool getIndirectByVal() const {
@@ -387,12 +404,12 @@
 
   unsigned getIndirectAddrSpace() const {
     assert(isIndirectAliased() && "Invalid kind!");
-    return IndirectAddrSpace;
+    return IndirectAttr.AddrSpace;
   }
 
   void setIndirectAddrSpace(unsigned AddrSpace) {
     assert(isIndirectAliased() && "Invalid kind!");
-    IndirectAddrSpace = AddrSpace;
+    IndirectAttr.AddrSpace = AddrSpace;
   }
 
   bool getIndirectRealign() const {

diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 1d71148..0474ddb 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp

@@ -2363,6 +2363,7 @@
         Attrs.addAttribute(llvm::Attribute::Nest);
       else if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
+      Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));
       break;
 
     case ABIArgInfo::Indirect: {

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index bd3c265..3e0a269 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp

@@ -5690,8 +5690,19 @@
   // In variadic functions on Windows, all composite types are treated alike,
   // no special handling of HFAs/HVAs.
   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
+    if (Kind != AArch64ABIInfo::AAPCS)
+      return ABIArgInfo::getDirect(
+          llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+
+    // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
+    // default otherwise.
+    unsigned Align =
+        getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
+    unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
+    Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
     return ABIArgInfo::getDirect(
-        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
+        nullptr, true, Align);
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.

diff --git a/test/CodeGen/aarch64-args-hfa.c b/test/CodeGen/aarch64-args-hfa.c
new file mode 100644
index 0000000..4abdc42
--- /dev/null
+++ b/test/CodeGen/aarch64-args-hfa.c

@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+
+typedef struct {
+  float v[2];
+} S0;
+
+// CHECK: define{{.*}} float @f0([2 x float] %h.coerce)
+float f0(S0 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} float @f0_call()
+// CHECK: %call = call float @f0([2 x float] %1)
+float f0_call() {
+  S0 h = {1.0f, 2.0f};
+  return f0(h);
+}
+typedef struct {
+  double v[2];
+} S1;
+
+// CHECK: define{{.*}} double @f1([2 x double] %h.coerce)
+double f1(S1 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f1_call()
+// CHECK: %call = call double @f1([2 x double] %1
+double f1_call() {
+  S1 h = {1.0, 2.0};
+  return f1(h);
+}
+typedef struct {
+  __attribute__((__aligned__(16))) double v[2];
+} S2;
+
+// CHECK-AAPCS:  define{{.*}} double @f2([2 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f2([2 x double] %h.coerce)
+double f2(S2 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f2_call()
+// CHECK-AAPCS:  %call = call double @f2([2 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f2([2 x double] %1
+double f2_call() {
+  S2 h = {1.0, 2.0};
+  return f2(h);
+}
+
+typedef struct {
+  __attribute__((__aligned__(32))) double v[4];
+} S3;
+
+// CHECK-AAPCS:  define{{.*}} double @f3([4 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f3([4 x double] %h.coerce)
+double f3(S3 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f3_call()
+// CHECK-AAPCS:  %call = call double @f3([4 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f3([4 x double] %1
+double f3_call() {
+  S3 h = {1.0, 2.0};
+  return f3(h);
+}
commit	308e2c0318ee84d463ce58ff899c3e2beca92435	[log] [tgz]
author	Momchil Velikov <momchil.velikov@arm.com>	Thu Apr 15 19:58:54 2021 +0100
committer	Copybara-Service <copybara-worker@google.com>	Fri Apr 16 12:15:57 2021 -0700
tree	ba9185131aeed8a15d5367a99d53572cb51502e7
parent	3eb21fca19fb65b419c03424f04cc3af90421e76 [diff]