[AArch64][GlobalISel] Add some support for G_CONCAT_VECTORS.

Handles concatenating 2 x v2s32 and 2 x v4s16

Differential Revision: https://reviews.llvm.org/D59390

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356212 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 6b0434a..d6c7c6e 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -88,6 +88,7 @@
 def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
 def : GINodeEquiv<G_CTPOP, ctpop>;
 def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
 def : GINodeEquiv<G_FCEIL, fceil>;
 def : GINodeEquiv<G_FCOS, fcos>;
 def : GINodeEquiv<G_FSIN, fsin>;
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index ec2ce93..e1f26ec 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -18,6 +18,7 @@
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -91,11 +92,15 @@
                                  SmallVectorImpl<int> &Idxs) const;
   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
                                          MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitVectorConcat(unsigned Op1, unsigned Op2,
+
+  // Emit a vector concat operation.
+  MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
+                                 unsigned Op2,
                                  MachineIRBuilder &MIRBuilder) const;
 
   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
@@ -1726,6 +1731,8 @@
     return selectExtractElt(I, MRI);
   case TargetOpcode::G_INSERT_VECTOR_ELT:
     return selectInsertElt(I, MRI);
+  case TargetOpcode::G_CONCAT_VECTORS:
+    return selectConcatVectors(I, MRI);
   }
 
   return false;
@@ -2067,6 +2074,21 @@
   return true;
 }
 
+bool AArch64InstructionSelector::selectConcatVectors(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+         "Unexpected opcode");
+  unsigned Dst = I.getOperand(0).getReg();
+  unsigned Op1 = I.getOperand(1).getReg();
+  unsigned Op2 = I.getOperand(2).getReg();
+  MachineIRBuilder MIRBuilder(I);
+  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
+  if (!ConcatMI)
+    return false;
+  I.eraseFromParent();
+  return true;
+}
+
 void AArch64InstructionSelector::collectShuffleMaskIndices(
     MachineInstr &I, MachineRegisterInfo &MRI,
     SmallVectorImpl<int> &Idxs) const {
@@ -2169,7 +2191,8 @@
 }
 
 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
-    unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const {
+    Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
+    MachineIRBuilder &MIRBuilder) const {
   // We implement a vector concat by:
   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
   // 2. Insert the upper vector into the destination's upper element
@@ -2215,13 +2238,14 @@
   std::tie(InsertOpc, InsSubRegIdx) =
       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
 
+  if (!Dst)
+    Dst = MRI.createVirtualRegister(DstRC);
   auto InsElt =
       MIRBuilder
-          .buildInstr(InsertOpc, {DstRC}, {WidenedOp1->getOperand(0).getReg()})
+          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
           .addImm(1) /* Lane index */
           .addUse(WidenedOp2->getOperand(0).getReg())
           .addImm(0);
-
   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
   return &*InsElt;
 }
@@ -2276,7 +2300,7 @@
   if (DstTy.getSizeInBits() != 128) {
     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
     // This case can be done with TBL1.
-    MachineInstr *Concat = emitVectorConcat(Src1Reg, Src2Reg, MIRBuilder);
+    MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
     if (!Concat) {
       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
       return false;
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index f775aeb..0678c45 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -500,6 +500,9 @@
       .clampNumElements(0, v4s32, v4s32)
       .clampNumElements(0, v2s64, v2s64);
 
+  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir b/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir
new file mode 100644
index 0000000..723be5a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -global-isel-abort=1 -o - | FileCheck %s
+
+---
+name:            legal_v4s32_v2s32
+body: |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: legal_v4s32_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+    ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CHECK: RET_ReallyLR
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR
+...
+---
+name:            legal_v8s16_v4s16
+body: |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: legal_v8s16_v4s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
+    ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK: RET_ReallyLR
+    %0:_(<4 x s16>) = COPY $d0
+    %1:_(<4 x s16>) = COPY $d1
+    %2:_(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
+    $q0 = COPY %2(<8 x s16>)
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index cfe3ecb..7a7e5af 100644
--- a/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -76,7 +76,7 @@
 # DEBUG:      .. type index coverage check SKIPPED: no rules defined
 #
 # DEBUG-NEXT: G_CONCAT_VECTORS (opcode {{[0-9]+}}): 2 type indices
-# DEBUG:      .. type index coverage check SKIPPED: no rules defined
+# DEBUG:      .. the first uncovered type index: 2, OK
 #
 # DEBUG-NEXT: G_PTRTOINT (opcode {{[0-9]+}}): 2 type indices
 # DEBUG:      .. the first uncovered type index: 2, OK
diff --git a/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir b/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir
new file mode 100644
index 0000000..425b44e
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name:            legal_v4s32_v2s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: legal_v4s32_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+    ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub
+    ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0
+    ; CHECK: $q0 = COPY [[INSvi64lane]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = COPY $d1
+    %2:fpr(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR
+
+...
+---
+name:            legal_v8s16_v4s16
+alignment:       2
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: legal_v8s16_v4s16
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+    ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub
+    ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0
+    ; CHECK: $q0 = COPY [[INSvi64lane]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<4 x s16>) = COPY $d0
+    %1:fpr(<4 x s16>) = COPY $d1
+    %2:fpr(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
+    $q0 = COPY %2(<8 x s16>)
+    RET_ReallyLR
+
+...