[SelectionDAG] Don't promote the alignment of allocas beyond the stack alignment. allocas in LLVM IR have a specified alignment. When that alignment is specified, the alloca has at least that alignment at runtime. If the specified type of the alloca has a higher preferred alignment, SelectionDAG currently ignores that specified alignment, and increases the alignment. It does this even if it would trigger stack realignment. I don't think this makes sense, so this patch changes that. I was looking into this for SVE in particular: for SVE, overaligning vscale'ed types is extra expensive because it requires realigning the stack multiple times, or using dynamic allocation. (This currently isn't implemented.) I updated the expected assembly for a couple tests; in particular, for arg-copy-elide.ll, the optimization in question does not increase the alignment the way SelectionDAG normally would. For the rest, I just increased the specified alignment on the allocas to match what SelectionDAG was inferring. Differential Revision: https://reviews.llvm.org/D79532

commit: c9c930ae67c38b93451aa979de723723aec0067d [log] [tgz]
author: Eli Friedman <efriedma@quicinc.com> Wed May 06 12:06:29 2020 -0700
committer: Eli Friedman <efriedma@quicinc.com> Mon May 11 17:39:00 2020 -0700
tree: 627a5bddc7d4822ea792a6dd567c263544454266
parent: 117e5609e98b43f925c678b72f816ad3a1c3eee7 [diff] [blame]
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index da77ee5..edae4f0 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll

@@ -1164,9 +1164,9 @@
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 eintry:
-  %__a.addr.i = alloca <4 x i64>, align 16
-  %__b.addr.i = alloca <4 x i64>, align 16
-  %vCr = alloca <4 x i64>, align 16
+  %__a.addr.i = alloca <4 x i64>, align 32
+  %__b.addr.i = alloca <4 x i64>, align 32
+  %vCr = alloca <4 x i64>, align 32
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   %tmp2 = load i8, i8* %cV_R.addr, align 4
@@ -1255,9 +1255,9 @@
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 eintry:
-  %__a.addr.i = alloca <4 x i64>, align 16
-  %__b.addr.i = alloca <4 x i64>, align 16
-  %vCr = alloca <4 x i64>, align 16
+  %__a.addr.i = alloca <4 x i64>, align 32
+  %__b.addr.i = alloca <4 x i64>, align 32
+  %vCr = alloca <4 x i64>, align 32
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   %tmp2 = load i16, i16* %cV_R.addr, align 4
@@ -1346,9 +1346,9 @@
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 eintry:
-  %__a.addr.i = alloca <4 x i64>, align 16
-  %__b.addr.i = alloca <4 x i64>, align 16
-  %vCr = alloca <4 x i64>, align 16
+  %__a.addr.i = alloca <4 x i64>, align 32
+  %__b.addr.i = alloca <4 x i64>, align 32
+  %vCr = alloca <4 x i64>, align 32
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   %tmp2 = load i32, i32* %cV_R.addr, align 4
@@ -1436,9 +1436,9 @@
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 eintry:
-  %__a.addr.i = alloca <4 x i64>, align 16
-  %__b.addr.i = alloca <4 x i64>, align 16
-  %vCr = alloca <4 x i64>, align 16
+  %__a.addr.i = alloca <4 x i64>, align 32
+  %__b.addr.i = alloca <4 x i64>, align 32
+  %vCr = alloca <4 x i64>, align 32
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   %tmp2 = load i64, i64* %cV_R.addr, align 4
commit	c9c930ae67c38b93451aa979de723723aec0067d	[log] [tgz]
author	Eli Friedman <efriedma@quicinc.com>	Wed May 06 12:06:29 2020 -0700
committer	Eli Friedman <efriedma@quicinc.com>	Mon May 11 17:39:00 2020 -0700
tree	627a5bddc7d4822ea792a6dd567c263544454266
parent	117e5609e98b43f925c678b72f816ad3a1c3eee7 [diff] [blame]