AArch64: avoid splitting vector truncating stores. We have code to split vector splats (of zero and non-zero) for performance reasons, but it ignores the fact that a store might be truncating. Actually, truncating stores are formed for vNi8 and vNi16 types. Since the truncation is from a legal type, the size of the store is always <= 64-bits and so they don't actually benefit from being split up anyway, so this patch just disables that transformation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350620 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 8af0c56aa2672a1d02d8a349c9e18381a2482a39 [log] [tgz]
author: Tim Northover <tnorthover@apple.com> Tue Jan 08 13:30:27 2019 +0000
committer: Tim Northover <tnorthover@apple.com> Tue Jan 08 13:30:27 2019 +0000
tree: 3f019ad24c17d64bb7b058b5db31023b30561264
parent: e5efa6a22a313b382476afc8731df5494b0ed80f [diff]
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 623815e..c7f46a2 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp

@@ -10053,6 +10053,7 @@
 
 static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
                                SDValue SplatVal, unsigned NumVecElts) {
+  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
   unsigned OrigAlignment = St.getAlignment();
   unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
 
@@ -10127,6 +10128,11 @@
   if (!StVal.hasOneUse())
     return SDValue();
 
+  // If the store is truncating then it's going down to i16 or smaller, which
+  // means it can be implemented in a single store anyway.
+  if (St.isTruncatingStore())
+    return SDValue();
+
   // If the immediate offset of the address operand is too large for the stp
   // instruction, then bail out.
   if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
@@ -10177,6 +10183,11 @@
   if (NumVecElts != 4 && NumVecElts != 2)
     return SDValue();
 
+  // If the store is truncating then it's going down to i16 or smaller, which
+  // means it can be implemented in a single store anyway.
+  if (St.isTruncatingStore())
+    return SDValue();
+
   // Check that this is a splat.
   // Make sure that each of the relevant vector element locations are inserted
   // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.

diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index 7f6cba2..fe55806 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll

@@ -1681,3 +1681,19 @@
   %add = add i64 %ld, 1
   ret i64 %add
 }
+
+; CHECK-LABEL: trunc_splat_zero:
+; CHECK-DAG: strh wzr, [x0]
+define void @trunc_splat_zero(<2 x i8>* %ptr) {
+  store <2 x i8> zeroinitializer, <2 x i8>* %ptr, align 2
+  ret void
+}
+
+; CHECK-LABEL: trunc_splat:
+; CHECK: mov [[VAL:w[0-9]+]], #42
+; CHECK: movk [[VAL]], #42, lsl #16
+; CHECK: str [[VAL]], [x0]
+define void @trunc_splat(<2 x i16>* %ptr) {
+  store <2 x i16> <i16 42, i16 42>, <2 x i16>* %ptr, align 4
+  ret void
+}
commit	8af0c56aa2672a1d02d8a349c9e18381a2482a39	[log] [tgz]
author	Tim Northover <tnorthover@apple.com>	Tue Jan 08 13:30:27 2019 +0000
committer	Tim Northover <tnorthover@apple.com>	Tue Jan 08 13:30:27 2019 +0000
tree	3f019ad24c17d64bb7b058b5db31023b30561264
parent	e5efa6a22a313b382476afc8731df5494b0ed80f [diff]