[PowerPC][AIX] Add support for vector arg passing on the stack.

Enable passing more vector arguments then available vector
argument passing registers.

Differential Revision: https://reviews.llvm.org/D96415

GitOrigin-RevId: bb260b1ca7d51869e140212aa543f53dfcf01a1b
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index af35f10..992fd8b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6401,12 +6401,14 @@
       report_fatal_error(
           "variadic arguments for vector types are unimplemented for AIX");
 
-    if (unsigned VReg = State.AllocateReg(VR))
+    if (unsigned VReg = State.AllocateReg(VR)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
-    else {
-      report_fatal_error(
-          "passing vector parameters to the stack is unimplemented for AIX");
+      return false;
     }
+
+    const unsigned VecSize = 16;
+    const unsigned Offset = State.AllocateStack(VecSize, Align(VecSize));
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   }
@@ -6554,10 +6556,6 @@
     CCValAssign &VA = ArgLocs[I++];
     MVT LocVT = VA.getLocVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
-    if (VA.isMemLoc() && VA.getValVT().isVector())
-      report_fatal_error(
-          "passing vector parameters to the stack is unimplemented for AIX");
-
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
     // in register.  The caller is required to initialize both the register
@@ -6908,10 +6906,6 @@
     const MVT LocVT = VA.getLocVT();
     const MVT ValVT = VA.getValVT();
 
-    if (VA.isMemLoc() && VA.getValVT().isVector())
-      report_fatal_error(
-          "passing vector parameters to the stack is unimplemented for AIX");
-
     switch (VA.getLocInfo()) {
     default:
       report_fatal_error("Unexpected argument extension type.");
diff --git a/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll b/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll
new file mode 100644
index 0000000..468da5c
--- /dev/null
+++ b/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll
@@ -0,0 +1,68 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=32BIT
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \
+; RUN:     -stop-after=machine-cp -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=MIR32
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=64BIT
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \
+; RUN:     -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=MIR64
+
+%struct.Test = type { double, double, double, double }
+
+define double @test(i32 signext %r3, i32 signext %r4, double %fpr1, double %fpr2, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4, <2 x double> %v5, <2 x double> %v6, <2 x double> %v7, <2 x double> %v8, <2 x double> %v9, <2 x double> %v10, <2 x double> %v11, <2 x double> %v12, <2 x double> %v13, <2 x double> %vSpill, double %fpr3, double %fpr4, double %fpr5, double %fpr6, double %fpr7, double %fpr8, double %fpr9, double %fpr10, double %fpr11, double %fpr12, double %fpr13, i32 signext %gprSpill, %struct.Test* nocapture readonly byval(%struct.Test) align 4 %t) {
+entry:
+  %vecext = extractelement <2 x double> %vSpill, i32 0
+  %x = getelementptr inbounds %struct.Test, %struct.Test* %t, i32 0, i32 0
+  %0 = load double, double* %x, align 4
+  %add = fadd double %vecext, %0
+  ret double %add
+}
+
+; 32BIT-LABEL: .test:
+; 32BIT-DAG:     lfd {{[0-9]+}}, 48(1)
+; 32BIT-DAG:     lfd {{[0-9]+}}, 156(1)
+
+; MIR32: name:            test
+; MIR32: fixedStack:
+; MIR32:   - { id: 0, type: default, offset: 156, size: 32, alignment: 4, stack-id: default,
+; MIR32:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+; MIR32:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR32:   - { id: 1, type: default, offset: 152, size: 4, alignment: 8, stack-id: default,
+; MIR32:       isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+; MIR32:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR32:   - { id: 2, type: default, offset: 48, size: 16, alignment: 16, stack-id: default,
+; MIR32:       isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+; MIR32:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+; MIR32:  renamable $[[GPR1:r[0-9]+]] = ADDI %fixed-stack.2, 0
+; MIR32:  renamable $[[GPR2:r[0-9]+]] = ADDI %fixed-stack.0, 0
+; MIR32:  renamable $f{{[0-9]+}} = XFLOADf64 $zero, killed renamable $[[GPR1]]
+; MIR32:  renamable $f{{[0-9]+}} = XFLOADf64 $zero, killed renamable $[[GPR2]]
+
+; 64BIT-LABEL: .test:
+; 64BIT-DAG:     lfd {{[0-9]+}}, 80(1)
+; 64BIT-DAG:     lfd {{[0-9]+}}, 192(1)
+
+; MIR64: name:            test
+; MIR64: fixedStack:
+; MIR64:   - { id: 0, type: default, offset: 192, size: 32, alignment: 16, stack-id: default,
+; MIR64:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+; MIR64:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR64:   - { id: 1, type: default, offset: 188, size: 4, alignment: 4, stack-id: default,
+; MIR64:       isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+; MIR64:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR64:   - { id: 2, type: default, offset: 80, size: 16, alignment: 16, stack-id: default,
+; MIR64:       isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+; MIR64:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+; MIR64:   renamable $[[GPR1:x[0-9]+]] = ADDI8 %fixed-stack.2, 0
+; MIR64:   renamable $[[GPR2:x[0-9]+]] = ADDI8 %fixed-stack.0, 0
+; MIR64:   renamable $f{{[0-9]+}} = XFLOADf64 $zero8, killed renamable $[[GPR1]]
+; MIR64:   renamable $f{{[0-9]+}} = XFLOADf64 $zero8, killed renamable $[[GPR2]]
diff --git a/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll b/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll
new file mode 100644
index 0000000..9fe8783
--- /dev/null
+++ b/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \
+; RUN:     -stop-after=machine-cp -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=MIR32
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \
+; RUN:     -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=MIR64
+
+%struct.Test = type { double, double, double, double }
+
+@__const.caller.t = private unnamed_addr constant %struct.Test { double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00 }, align 8
+
+define double @caller() {
+; MIR32-LABEL: name: caller
+; MIR32: bb.0.entry:
+; MIR32:   renamable $r3 = LWZtoc @__const.caller.t, $r2 :: (load 4 from got)
+; MIR32:   renamable $r4 = LI 31
+; MIR32:   renamable $v2 = LVX renamable $r3, killed renamable $r4
+; MIR32:   renamable $r4 = LI 16
+; MIR32:   renamable $v3 = LVX renamable $r3, killed renamable $r4
+; MIR32:   renamable $v4 = LVSL $zero, renamable $r3
+; MIR32:   renamable $v2 = VPERM renamable $v3, killed renamable $v2, renamable $v4
+; MIR32:   renamable $r4 = LI 172
+; MIR32:   STXVW4X killed renamable $v2, $r1, killed renamable $r4 :: (store 16 + 16, align 4)
+; MIR32:   renamable $v2 = LVX $zero, killed renamable $r3
+; MIR32:   renamable $v2 = VPERM killed renamable $v2, killed renamable $v3, killed renamable $v4
+; MIR32:   renamable $r3 = LI 156
+; MIR32:   STXVW4X killed renamable $v2, $r1, killed renamable $r3 :: (store 16, align 4)
+; MIR32:   ADJCALLSTACKDOWN 188, 0, implicit-def dead $r1, implicit $r1
+; MIR32:   renamable $vsl0 = XXLXORz
+; MIR32:   $f1 = XXLXORdpz
+; MIR32:   $f2 = XXLXORdpz
+; MIR32:   $v2 = XXLXORz
+; MIR32:   $v3 = XXLXORz
+; MIR32:   $v4 = XXLXORz
+; MIR32:   $v5 = XXLXORz
+; MIR32:   $v6 = XXLXORz
+; MIR32:   $v7 = XXLXORz
+; MIR32:   $v8 = XXLXORz
+; MIR32:   $v9 = XXLXORz
+; MIR32:   $v10 = XXLXORz
+; MIR32:   $v11 = XXLXORz
+; MIR32:   $v12 = XXLXORz
+; MIR32:   $v13 = XXLXORz
+; MIR32:   $f3 = XXLXORdpz
+; MIR32:   $f4 = XXLXORdpz
+; MIR32:   $f5 = XXLXORdpz
+; MIR32:   $f6 = XXLXORdpz
+; MIR32:   $f7 = XXLXORdpz
+; MIR32:   renamable $r3 = LI 136
+; MIR32:   $f8 = XXLXORdpz
+; MIR32:   renamable $r4 = LI 120
+; MIR32:   renamable $r5 = LWZtoc %const.0, $r2 :: (load 4 from got)
+; MIR32:   STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8)
+; MIR32:   $f9 = XXLXORdpz
+; MIR32:   renamable $r3 = LI 104
+; MIR32:   STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store 16, align 8)
+; MIR32:   $f10 = XXLXORdpz
+; MIR32:   STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8)
+; MIR32:   renamable $r3 = LI 88
+; MIR32:   $f11 = XXLXORdpz
+; MIR32:   STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8)
+; MIR32:   renamable $r3 = LI 72
+; MIR32:   renamable $v0 = LXVD2X $zero, killed renamable $r5 :: (load 16 from constant-pool)
+; MIR32:   $f12 = XXLXORdpz
+; MIR32:   STXVW4X killed renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8)
+; MIR32:   $f13 = XXLXORdpz
+; MIR32:   renamable $r5 = LI 48
+; MIR32:   renamable $r6 = LI 512
+; MIR32:   $r3 = LI 128
+; MIR32:   $r4 = LI 256
+; MIR32:   STXVD2X killed renamable $v0, $r1, killed renamable $r5 :: (store 16)
+; MIR32:   STW killed renamable $r6, 152, $r1 :: (store 4)
+; MIR32:   BL_NOP <mcsymbol .callee[PR]>, csr_aix32_altivec, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $f1, implicit $f2, implicit $v2, implicit $v3, implicit $v4, implicit $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def $f1
+; MIR32:   ADJCALLSTACKUP 188, 0, implicit-def dead $r1, implicit $r1
+; MIR32:   BLR implicit $lr, implicit $rm, implicit $f1
+
+; MIR64-LABEL: name: caller
+; MIR64: bb.0.entry:
+; MIR64:   renamable $x3 = LDtoc @__const.caller.t, $x2 :: (load 8 from got)
+; MIR64:   renamable $x4 = LI8 16
+; MIR64:   renamable $vsl0 = LXVD2X renamable $x3, killed renamable $x4 :: (load 16 + 16, align 8)
+; MIR64:   renamable $x4 = LI8 208
+; MIR64:   STXVD2X killed renamable $vsl0, $x1, killed renamable $x4 :: (store 16 + 16, align 4)
+; MIR64:   renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16, align 8)
+; MIR64:   renamable $x3 = LI8 192
+; MIR64:   STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 4)
+; MIR64:   ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1
+; MIR64:   $f1 = XXLXORdpz
+; MIR64:   $f2 = XXLXORdpz
+; MIR64:   $v2 = XXLXORz
+; MIR64:   $v3 = XXLXORz
+; MIR64:   $v4 = XXLXORz
+; MIR64:   $v5 = XXLXORz
+; MIR64:   $v6 = XXLXORz
+; MIR64:   $v7 = XXLXORz
+; MIR64:   $v8 = XXLXORz
+; MIR64:   $v9 = XXLXORz
+; MIR64:   $v10 = XXLXORz
+; MIR64:   $v11 = XXLXORz
+; MIR64:   $v12 = XXLXORz
+; MIR64:   $v13 = XXLXORz
+; MIR64:   $f3 = XXLXORdpz
+; MIR64:   renamable $x3 = LDtocCPT %const.0, $x2 :: (load 8 from got)
+; MIR64:   $f4 = XXLXORdpz
+; MIR64:   $f5 = XXLXORdpz
+; MIR64:   $f6 = XXLXORdpz
+; MIR64:   renamable $x4 = LDtocCPT %const.1, $x2 :: (load 8 from got)
+; MIR64:   renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16 from constant-pool)
+; MIR64:   $f7 = XXLXORdpz
+; MIR64:   $f8 = XXLXORdpz
+; MIR64:   renamable $x3 = LI8 160
+; MIR64:   $f9 = XXLXORdpz
+; MIR64:   renamable $x5 = LI8 144
+; MIR64:   renamable $vsl13 = LXVD2X $zero8, killed renamable $x4 :: (load 16 from constant-pool)
+; MIR64:   STXVD2X renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8)
+; MIR64:   $f10 = XXLXORdpz
+; MIR64:   renamable $x3 = LI8 128
+; MIR64:   STXVD2X renamable $vsl0, $x1, killed renamable $x5 :: (store 16, align 8)
+; MIR64:   $f11 = XXLXORdpz
+; MIR64:   renamable $x4 = LI8 80
+; MIR64:   STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8)
+; MIR64:   $f12 = XXLXORdpz
+; MIR64:   STXVD2X killed renamable $vsl13, $x1, killed renamable $x4 :: (store 16)
+; MIR64:   $f13 = XXLXORdpz
+; MIR64:   renamable $x5 = LI8 512
+; MIR64:   renamable $x6 = LI8 0
+; MIR64:   $x3 = LI8 128
+; MIR64:   $x4 = LI8 256
+; MIR64:   STD killed renamable $x5, 184, $x1 :: (store 8)
+; MIR64:   STD killed renamable $x6, 176, $x1 :: (store 8)
+; MIR64:   BL8_NOP <mcsymbol .callee[PR]>, csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $f1, implicit $f2, implicit killed $v2, implicit killed $v3, implicit killed $v4, implicit killed $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def $f1
+; MIR64:   ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1
+; MIR64:   BLR8 implicit $lr8, implicit $rm, implicit $f1
+  entry:
+    %call = tail call double @callee(i32 signext 128, i32 signext 256, double 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 2.400000e+01, double 2.500000e+01>, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, i32 signext 512, %struct.Test* nonnull byval(%struct.Test) align 4 @__const.caller.t)
+      ret double %call
+}
+
+declare double @callee(i32 signext, i32 signext, double, double, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, double, double, double, double, double, double, double, double, double, double, double, i32 signext, %struct.Test* byval(%struct.Test) align 8)
diff --git a/test/CodeGen/PowerPC/aix-vec-arg-spills.ll b/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
new file mode 100644
index 0000000..ce1ba5c
--- /dev/null
+++ b/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=32BIT
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN:   FileCheck %s --check-prefix=64BIT
+%struct.Test = type { double, double, double, double }
+
+@__const.caller.t = private unnamed_addr constant %struct.Test { double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00 }, align 8
+
+define double @caller() {
+; 32BIT-LABEL: caller:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr 0
+; 32BIT-NEXT:    stw 0, 8(1)
+; 32BIT-NEXT:    stwu 1, -192(1)
+; 32BIT-NEXT:    lwz 3, L..C0(2)
+; 32BIT-NEXT:    li 4, 31
+; 32BIT-NEXT:    xxlxor 0, 0, 0
+; 32BIT-NEXT:    lwz 5, L..C1(2)
+; 32BIT-NEXT:    li 6, 512
+; 32BIT-NEXT:    xxlxor 1, 1, 1
+; 32BIT-NEXT:    xxlxor 2, 2, 2
+; 32BIT-NEXT:    lvx 2, 3, 4
+; 32BIT-NEXT:    li 4, 16
+; 32BIT-NEXT:    lvsl 4, 0, 3
+; 32BIT-NEXT:    xxlxor 37, 37, 37
+; 32BIT-NEXT:    lvx 3, 3, 4
+; 32BIT-NEXT:    li 4, 172
+; 32BIT-NEXT:    lxvd2x 32, 0, 5
+; 32BIT-NEXT:    xxlxor 38, 38, 38
+; 32BIT-NEXT:    xxlxor 39, 39, 39
+; 32BIT-NEXT:    li 5, 48
+; 32BIT-NEXT:    vperm 2, 3, 2, 4
+; 32BIT-NEXT:    xxlxor 40, 40, 40
+; 32BIT-NEXT:    xxlxor 41, 41, 41
+; 32BIT-NEXT:    xxlxor 42, 42, 42
+; 32BIT-NEXT:    xxlxor 43, 43, 43
+; 32BIT-NEXT:    xxlxor 44, 44, 44
+; 32BIT-NEXT:    stxvw4x 34, 1, 4
+; 32BIT-NEXT:    li 4, 120
+; 32BIT-NEXT:    xxlxor 45, 45, 45
+; 32BIT-NEXT:    lvx 2, 0, 3
+; 32BIT-NEXT:    li 3, 156
+; 32BIT-NEXT:    xxlxor 3, 3, 3
+; 32BIT-NEXT:    xxlxor 4, 4, 4
+; 32BIT-NEXT:    vperm 2, 2, 3, 4
+; 32BIT-NEXT:    xxlxor 35, 35, 35
+; 32BIT-NEXT:    xxlxor 36, 36, 36
+; 32BIT-NEXT:    xxlxor 5, 5, 5
+; 32BIT-NEXT:    xxlxor 6, 6, 6
+; 32BIT-NEXT:    xxlxor 7, 7, 7
+; 32BIT-NEXT:    stxvw4x 34, 1, 3
+; 32BIT-NEXT:    li 3, 136
+; 32BIT-NEXT:    xxlxor 34, 34, 34
+; 32BIT-NEXT:    stxvw4x 0, 1, 3
+; 32BIT-NEXT:    li 3, 104
+; 32BIT-NEXT:    stxvw4x 0, 1, 4
+; 32BIT-NEXT:    li 4, 256
+; 32BIT-NEXT:    stxvw4x 0, 1, 3
+; 32BIT-NEXT:    li 3, 88
+; 32BIT-NEXT:    xxlxor 8, 8, 8
+; 32BIT-NEXT:    xxlxor 9, 9, 9
+; 32BIT-NEXT:    stxvw4x 0, 1, 3
+; 32BIT-NEXT:    li 3, 72
+; 32BIT-NEXT:    xxlxor 10, 10, 10
+; 32BIT-NEXT:    stxvw4x 0, 1, 3
+; 32BIT-NEXT:    li 3, 128
+; 32BIT-NEXT:    xxlxor 11, 11, 11
+; 32BIT-NEXT:    stxvd2x 32, 1, 5
+; 32BIT-NEXT:    stw 6, 152(1)
+; 32BIT-NEXT:    xxlxor 12, 12, 12
+; 32BIT-NEXT:    xxlxor 13, 13, 13
+; 32BIT-NEXT:    bl .callee[PR]
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    addi 1, 1, 192
+; 32BIT-NEXT:    lwz 0, 8(1)
+; 32BIT-NEXT:    mtlr 0
+; 32BIT-NEXT:    blr
+
+; 64BIT-LABEL: caller:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr 0
+; 64BIT-NEXT:    std 0, 16(1)
+; 64BIT-NEXT:    stdu 1, -224(1)
+; 64BIT-NEXT:    ld 3, L..C0(2)
+; 64BIT-NEXT:    li 4, 16
+; 64BIT-NEXT:    li 5, 144
+; 64BIT-NEXT:    xxlxor 1, 1, 1
+; 64BIT-NEXT:    li 6, 0
+; 64BIT-NEXT:    xxlxor 2, 2, 2
+; 64BIT-NEXT:    xxlxor 34, 34, 34
+; 64BIT-NEXT:    lxvd2x 0, 3, 4
+; 64BIT-NEXT:    li 4, 208
+; 64BIT-NEXT:    xxlxor 35, 35, 35
+; 64BIT-NEXT:    xxlxor 36, 36, 36
+; 64BIT-NEXT:    xxlxor 37, 37, 37
+; 64BIT-NEXT:    stxvd2x 0, 1, 4
+; 64BIT-NEXT:    ld 4, L..C1(2)
+; 64BIT-NEXT:    xxlxor 38, 38, 38
+; 64BIT-NEXT:    lxvd2x 0, 0, 3
+; 64BIT-NEXT:    li 3, 192
+; 64BIT-NEXT:    xxlxor 39, 39, 39
+; 64BIT-NEXT:    xxlxor 40, 40, 40
+; 64BIT-NEXT:    lxvd2x 13, 0, 4
+; 64BIT-NEXT:    li 4, 80
+; 64BIT-NEXT:    xxlxor 41, 41, 41
+; 64BIT-NEXT:    stxvd2x 0, 1, 3
+; 64BIT-NEXT:    ld 3, L..C2(2)
+; 64BIT-NEXT:    xxlxor 42, 42, 42
+; 64BIT-NEXT:    xxlxor 43, 43, 43
+; 64BIT-NEXT:    xxlxor 44, 44, 44
+; 64BIT-NEXT:    lxvd2x 0, 0, 3
+; 64BIT-NEXT:    li 3, 160
+; 64BIT-NEXT:    xxlxor 45, 45, 45
+; 64BIT-NEXT:    xxlxor 3, 3, 3
+; 64BIT-NEXT:    xxlxor 4, 4, 4
+; 64BIT-NEXT:    stxvd2x 0, 1, 3
+; 64BIT-NEXT:    li 3, 128
+; 64BIT-NEXT:    xxlxor 5, 5, 5
+; 64BIT-NEXT:    xxlxor 6, 6, 6
+; 64BIT-NEXT:    stxvd2x 0, 1, 5
+; 64BIT-NEXT:    li 5, 512
+; 64BIT-NEXT:    xxlxor 7, 7, 7
+; 64BIT-NEXT:    stxvd2x 0, 1, 3
+; 64BIT-NEXT:    xxlxor 8, 8, 8
+; 64BIT-NEXT:    stxvd2x 13, 1, 4
+; 64BIT-NEXT:    li 4, 256
+; 64BIT-NEXT:    std 5, 184(1)
+; 64BIT-NEXT:    xxlxor 9, 9, 9
+; 64BIT-NEXT:    std 6, 176(1)
+; 64BIT-NEXT:    xxlxor 10, 10, 10
+; 64BIT-NEXT:    xxlxor 11, 11, 11
+; 64BIT-NEXT:    xxlxor 12, 12, 12
+; 64BIT-NEXT:    xxlxor 13, 13, 13
+; 64BIT-NEXT:    bl .callee[PR]
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    addi 1, 1, 224
+; 64BIT-NEXT:    ld 0, 16(1)
+; 64BIT-NEXT:    mtlr 0
+; 64BIT-NEXT:    blr
+  entry:
+    %call = tail call double @callee(i32 signext 128, i32 signext 256, double 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 0.000000e+00, double 0.000000e+00>, <2 x double> <double 2.400000e+01, double 2.500000e+01>, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, i32 signext 512, %struct.Test* nonnull byval(%struct.Test) align 4 @__const.caller.t)
+      ret double %call
+}
+
+declare double @callee(i32 signext, i32 signext, double, double, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, double, double, double, double, double, double, double, double, double, double, double, i32 signext, %struct.Test* byval(%struct.Test) align 8)
diff --git a/test/CodeGen/PowerPC/aix-vector-stack-caller.ll b/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
index 63653f5..e09b519 100644
--- a/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
+++ b/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
@@ -1,17 +1,98 @@
-; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
-; RUN:   -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \
-; RUN: FileCheck %s --check-prefix=AIX-ERROR
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s --check-prefixes=32BIT,LITERAL
 
-; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
-; RUN:   -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \
-; RUN: FileCheck %s --check-prefix=AIX-ERROR
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s --check-prefixes=64BIT,LITERAL
 
 define dso_local i32 @vec_caller() {
+; LITERAL:       L..CPI0_0:
+; LITERAL-NEXT:    .vbyte  4, 53
+; LITERAL-NEXT:    .vbyte  4, 54
+; LITERAL-NEXT:    .vbyte  4, 55
+; LITERAL-NEXT:    .vbyte  4, 56
+; LITERAL-NEXT:  L..CPI0_1:
+; LITERAL-NEXT:    .vbyte  4, 49
+; LITERAL-NEXT:    .vbyte  4, 50
+; LITERAL-NEXT:    .vbyte  4, 51
+; LITERAL-NEXT:    .vbyte  4, 52
+
+; 32BIT-LABEL: vec_caller:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr 0
+; 32BIT-NEXT:    stw 0, 8(1)
+; 32BIT-NEXT:    stwu 1, -64(1)
+; 32BIT-NEXT:    lwz 3, L..C0(2)
+; 32BIT-NEXT:    lwz 4, L..C1(2)
+; 32BIT-NEXT:    xxlxor 34, 34, 34
+; 32BIT-NEXT:    xxlxor 35, 35, 35
+; 32BIT-NEXT:    xxlxor 36, 36, 36
+; 32BIT-NEXT:    lxvw4x 0, 0, 3
+; 32BIT-NEXT:    lxvw4x 1, 0, 4
+; 32BIT-NEXT:    xxlxor 37, 37, 37
+; 32BIT-NEXT:    li 3, 48
+; 32BIT-NEXT:    xxlxor 38, 38, 38
+; 32BIT-NEXT:    li 4, 32
+; 32BIT-NEXT:    xxlxor 39, 39, 39
+; 32BIT-NEXT:    xxlxor 40, 40, 40
+; 32BIT-NEXT:    stxvw4x 0, 1, 3
+; 32BIT-NEXT:    xxlxor 41, 41, 41
+; 32BIT-NEXT:    stxvw4x 1, 1, 4
+; 32BIT-NEXT:    xxlxor 42, 42, 42
+; 32BIT-NEXT:    xxlxor 43, 43, 43
+; 32BIT-NEXT:    xxlxor 44, 44, 44
+; 32BIT-NEXT:    xxlxor 45, 45, 45
+; 32BIT-NEXT:    bl .vec_callee_stack[PR]
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    addi 1, 1, 64
+; 32BIT-NEXT:    lwz 0, 8(1)
+; 32BIT-NEXT:    mtlr 0
+; 32BIT-NEXT:    blr
+
+
+; 64BIT-LABEL: vec_caller:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr 0
+; 64BIT-NEXT:    std 0, 16(1)
+; 64BIT-NEXT:    stdu 1, -112(1)
+; 64BIT-NEXT:    ld 3, L..C0(2)
+; 64BIT-NEXT:    ld 4, L..C1(2)
+; 64BIT-NEXT:    xxlxor 34, 34, 34
+; 64BIT-NEXT:    xxlxor 35, 35, 35
+; 64BIT-NEXT:    xxlxor 36, 36, 36
+; 64BIT-NEXT:    lxvw4x 0, 0, 3
+; 64BIT-NEXT:    lxvw4x 1, 0, 4
+; 64BIT-NEXT:    xxlxor 37, 37, 37
+; 64BIT-NEXT:    li 3, 64
+; 64BIT-NEXT:    xxlxor 38, 38, 38
+; 64BIT-NEXT:    li 4, 48
+; 64BIT-NEXT:    xxlxor 39, 39, 39
+; 64BIT-NEXT:    xxlxor 40, 40, 40
+; 64BIT-NEXT:    stxvw4x 0, 1, 3
+; 64BIT-NEXT:    xxlxor 41, 41, 41
+; 64BIT-NEXT:    stxvw4x 1, 1, 4
+; 64BIT-NEXT:    xxlxor 42, 42, 42
+; 64BIT-NEXT:    xxlxor 43, 43, 43
+; 64BIT-NEXT:    xxlxor 44, 44, 44
+; 64BIT-NEXT:    xxlxor 45, 45, 45
+; 64BIT-NEXT:    bl .vec_callee_stack[PR]
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    addi 1, 1, 112
+; 64BIT-NEXT:    ld 0, 16(1)
+; 64BIT-NEXT:    mtlr 0
+; 64BIT-NEXT:    blr
+
+; LITERAL:         .toc
+; LITERAL:       L..C0:
+; LITERAL-NEXT:    .tc L..CPI0_0[TC],L..CPI0_0
+; LITERAL-NEXT:  L..C1:
+; LITERAL-NEXT:    .tc L..CPI0_1[TC],L..CPI0_1
+
 entry:
-  %call = call i32 bitcast (i32 (...)* @vec_callee_stack to i32 (<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*)(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>, <4 x i32> <i32 29, i32 30, i32 31, i32 32>, <4 x i32> <i32 33, i32 34, i32 35, i32 36>, <4 x i32> <i32 37, i32 38, i32 39, i32 40>, <4 x i32> <i32 41, i32 42, i32 43, i32 44>, <4 x i32> <i32 45, i32 46, i32 47, i32 48>, <4 x i32> <i32 49, i32 50, i32 51, i32 52>, <4 x i32> <i32 53, i32 54, i32 55, i32 56>)
-  ret i32 0
+  %call = call i32 bitcast (i32 (...)* @vec_callee_stack to i32 (<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*)(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 49, i32 50, i32 51, i32 52>, <4 x i32> <i32 53, i32 54, i32 55, i32 56>)
+  ret i32 %call
 }
 
 declare i32 @vec_callee_stack(...)
-
-; AIX-ERROR:  LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX
diff --git a/test/CodeGen/PowerPC/aix-vector-stack.ll b/test/CodeGen/PowerPC/aix-vector-stack.ll
index 8809de9..26cf459 100644
--- a/test/CodeGen/PowerPC/aix-vector-stack.ll
+++ b/test/CodeGen/PowerPC/aix-vector-stack.ll
@@ -1,27 +1,18 @@
-; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
-; RUN:   -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \
-; RUN: FileCheck %s --check-prefix=AIX-ERROR
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s --check-prefix=32BIT
 
-; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
-; RUN:   -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \
-; RUN: FileCheck %s --check-prefix=AIX-ERROR
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
+; RUN:     -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s --check-prefix=64BIT
 
-define dso_local <4 x i32> @vec_callee_stack(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12, <4 x i32> %vec13, <4 x i32> %vec14) {
+define dso_local <4 x i32> @vec_callee_stack(<4 x i32> %vr2, <4 x i32> %vr3, <4 x i32> %vr4, <4 x i32> %vr5, <4 x i32> %vr6, <4 x i32> %vr7, <4 x i32> %vr8, <4 x i32> %vr9, <4 x i32> %vr10, <4 x i32> %vr11, <4 x i32> %vr12, <4 x i32> %vr13, <4 x i32> %vSpill1, <4 x i32> %vSpill2) {
 entry:
-  %add = add <4 x i32> %vec1, %vec2
-  %add1 = add <4 x i32> %add, %vec3
-  %add2 = add <4 x i32> %add1, %vec4
-  %add3 = add <4 x i32> %add2, %vec5
-  %add4 = add <4 x i32> %add3, %vec6
-  %add5 = add <4 x i32> %add4, %vec7
-  %add6 = add <4 x i32> %add5, %vec8
-  %add7 = add <4 x i32> %add6, %vec9
-  %add8 = add <4 x i32> %add7, %vec10
-  %add9 = add <4 x i32> %add8, %vec11
-  %add10 = add <4 x i32> %add9, %vec12
-  %add11 = add <4 x i32> %add10, %vec13
-  %add12 = add <4 x i32> %add11, %vec14
-  ret <4 x i32> %add12
+  ret <4 x i32> %vSpill2
 }
 
-; AIX-ERROR:  LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX
+; 32BIT:       addi [[SCRATCH:[0-9]+]], 1, 48
+; 32BIT-NEXT:  lxvw4x 34, 0, [[SCRATCH]]
+
+; 64BIT:       addi [[SCRATCH:[0-9]+]], 1, 64
+; 64BIT-NEXT:  lxvw4x 34, 0, [[SCRATCH]]