; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN:   -mcpu=future -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
; RUN:   -mcpu=future -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE

define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
; CHECK-LABEL: test_wacc_copy:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    std r31, -8(r1)
; CHECK-NEXT:    std r30, -16(r1)
; CHECK-NEXT:    mr r30, r1
; CHECK-NEXT:    clrldi r0, r1, 57
; CHECK-NEXT:    subfic r0, r0, -384
; CHECK-NEXT:    stdux r1, r1, r0
; CHECK-NEXT:    .cfi_def_cfa_register r30
; CHECK-NEXT:    .cfi_offset r31, -8
; CHECK-NEXT:    .cfi_offset r30, -16
; CHECK-NEXT:    mr r31, r1
; CHECK-NEXT:    std r3, 360(r31)
; CHECK-NEXT:    std r4, 352(r31)
; CHECK-NEXT:    stxv v2, 336(r31)
; CHECK-NEXT:    std r7, 328(r31)
; CHECK-NEXT:    ld r3, 360(r31)
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    lxvp vsp36, 32(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT:    lxvp vsp34, 64(r3)
; CHECK-NEXT:    lxvp vsp36, 96(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 224(r31)
; CHECK-NEXT:    stxvp vsp36, 192(r31)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 160(r31)
; CHECK-NEXT:    stxvp vsp36, 128(r31)
; CHECK-NEXT:    ld r3, 352(r31)
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    stxvp vsp34, 96(r31)
; CHECK-NEXT:    lxvp vsp34, 96(r31)
; CHECK-NEXT:    lxv vs0, 336(r31)
; CHECK-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 224(r31)
; CHECK-NEXT:    stxvp vsp36, 192(r31)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 160(r31)
; CHECK-NEXT:    stxvp vsp36, 128(r31)
; CHECK-NEXT:    lxvp vsp34, 128(r31)
; CHECK-NEXT:    lxvp vsp36, 160(r31)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT:    lxvp vsp34, 192(r31)
; CHECK-NEXT:    lxvp vsp36, 224(r31)
; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT:    ld r3, 328(r31)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r3)
; CHECK-NEXT:    stxvp vsp36, 64(r3)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r3)
; CHECK-NEXT:    stxvp vsp36, 0(r3)
; CHECK-NEXT:    mr r1, r30
; CHECK-NEXT:    ld r31, -8(r1)
; CHECK-NEXT:    ld r30, -16(r1)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: test_wacc_copy:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    std r31, -8(r1)
; CHECK-BE-NEXT:    std r30, -16(r1)
; CHECK-BE-NEXT:    mr r30, r1
; CHECK-BE-NEXT:    clrldi r0, r1, 57
; CHECK-BE-NEXT:    subfic r0, r0, -384
; CHECK-BE-NEXT:    stdux r1, r1, r0
; CHECK-BE-NEXT:    mr r31, r1
; CHECK-BE-NEXT:    std r3, 360(r31)
; CHECK-BE-NEXT:    std r4, 352(r31)
; CHECK-BE-NEXT:    stxv v2, 336(r31)
; CHECK-BE-NEXT:    std r5, 328(r31)
; CHECK-BE-NEXT:    ld r3, 360(r31)
; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 224(r31)
; CHECK-BE-NEXT:    stxvp vsp34, 192(r31)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 160(r31)
; CHECK-BE-NEXT:    stxvp vsp34, 128(r31)
; CHECK-BE-NEXT:    ld r3, 352(r31)
; CHECK-BE-NEXT:    lxvp vsp34, 0(r3)
; CHECK-BE-NEXT:    stxvp vsp34, 96(r31)
; CHECK-BE-NEXT:    lxvp vsp34, 96(r31)
; CHECK-BE-NEXT:    lxv vs0, 336(r31)
; CHECK-BE-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 224(r31)
; CHECK-BE-NEXT:    stxvp vsp34, 192(r31)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 160(r31)
; CHECK-BE-NEXT:    stxvp vsp34, 128(r31)
; CHECK-BE-NEXT:    lxvp vsp34, 224(r31)
; CHECK-BE-NEXT:    lxvp vsp36, 192(r31)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT:    lxvp vsp34, 160(r31)
; CHECK-BE-NEXT:    lxvp vsp36, 128(r31)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT:    ld r3, 328(r31)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r3)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r3)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r3)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r3)
; CHECK-BE-NEXT:    mr r1, r30
; CHECK-BE-NEXT:    ld r31, -8(r1)
; CHECK-BE-NEXT:    ld r30, -16(r1)
; CHECK-BE-NEXT:    blr
entry:
  %vdmrp.addr = alloca ptr, align 8
  %vpp.addr = alloca ptr, align 8
  %vc.addr = alloca <16 x i8>, align 16
  %resp.addr = alloca ptr, align 8
  %vdmr = alloca <1024 x i1>, align 128
  %vp = alloca <256 x i1>, align 32
  store ptr %vdmrp, ptr %vdmrp.addr, align 8
  store ptr %vpp, ptr %vpp.addr, align 8
  store <16 x i8> %vc, ptr %vc.addr, align 16
  store ptr %resp, ptr %resp.addr, align 8
  %0 = load ptr, ptr %vdmrp.addr, align 8
  %1 = load <1024 x i1>, ptr %0, align 128
  store <1024 x i1> %1, ptr %vdmr, align 128
  %2 = load ptr, ptr %vpp.addr, align 8
  %3 = load <256 x i1>, ptr %2, align 32
  store <256 x i1> %3, ptr %vp, align 32
  %4 = load <256 x i1>, ptr %vp, align 32
  %5 = load <16 x i8>, ptr %vc.addr, align 16
  %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
  store <1024 x i1> %6, ptr %vdmr, align 128
  %7 = load <1024 x i1>, ptr %vdmr, align 128
  %8 = load ptr, ptr %resp.addr, align 8
  store <1024 x i1> %7, ptr %8, align 128
  ret void
}

define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    dmsetdmrz dmr0
; CHECK-NEXT:    lxvp vsp34, 0(r3)
; CHECK-NEXT:    lxvp vsp36, 32(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-NEXT:    lxvp vsp34, 64(r3)
; CHECK-NEXT:    lxvp vsp36, 96(r3)
; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-NEXT:    dmmr dmr2, dmr0
; CHECK-NEXT:    dmxor dmr2, dmr1
; CHECK-NEXT:    lxvp vsp34, 0(r4)
; CHECK-NEXT:    lxvp vsp36, 32(r4)
; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-NEXT:    lxvp vsp34, 64(r4)
; CHECK-NEXT:    lxvp vsp36, 96(r4)
; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-NEXT:    dmxor dmr0, dmr1
; CHECK-NEXT:    dmmr dmr1, dmr2
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
; CHECK-NEXT:    stxvp vsp34, 96(r5)
; CHECK-NEXT:    stxvp vsp36, 64(r5)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
; CHECK-NEXT:    stxvp vsp34, 32(r5)
; CHECK-NEXT:    stxvp vsp36, 0(r5)
; CHECK-NEXT:    dmmr dmr0, dmr0
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT:    stxvp vsp34, 96(r6)
; CHECK-NEXT:    stxvp vsp36, 64(r6)
; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT:    stxvp vsp34, 32(r6)
; CHECK-NEXT:    stxvp vsp36, 0(r6)
; CHECK-NEXT:    blr
;
; CHECK-BE-LABEL: foo:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-NEXT:    dmsetdmrz dmr0
; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-BE-NEXT:    dmmr dmr2, dmr0
; CHECK-BE-NEXT:    dmxor dmr2, dmr1
; CHECK-BE-NEXT:    lxvp vsp34, 96(r4)
; CHECK-BE-NEXT:    lxvp vsp36, 64(r4)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-BE-NEXT:    lxvp vsp34, 32(r4)
; CHECK-BE-NEXT:    lxvp vsp36, 0(r4)
; CHECK-BE-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-BE-NEXT:    dmxor dmr0, dmr1
; CHECK-BE-NEXT:    dmmr dmr1, dmr2
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
; CHECK-BE-NEXT:    dmmr dmr0, dmr0
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT:    stxvp vsp36, 96(r6)
; CHECK-BE-NEXT:    stxvp vsp34, 64(r6)
; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT:    stxvp vsp36, 32(r6)
; CHECK-BE-NEXT:    stxvp vsp34, 0(r6)
; CHECK-BE-NEXT:    blr
entry:
  %0 = tail call <1024 x i1> @llvm.ppc.dmsetdmrz()
  %1 = load <1024 x i1>, ptr %p1, align 128
  %2 = tail call <1024 x i1> @llvm.ppc.dmxor(<1024 x i1> %0, <1024 x i1> %1)
  %3 = load <1024 x i1>, ptr %p2, align 128
  %4 = tail call <1024 x i1> @llvm.ppc.dmxor(<1024 x i1> %0, <1024 x i1> %3)
  %5 = tail call <1024 x i1> @llvm.ppc.dmmr(<1024 x i1> %2)
  store <1024 x i1> %5, ptr %res1, align 128
  %6 = tail call <1024 x i1> @llvm.ppc.dmmr(<1024 x i1> %4)
  store <1024 x i1> %6, ptr %res2, align 128
  ret void
}

declare <1024 x i1> @llvm.ppc.dmsetdmrz()
declare <1024 x i1> @llvm.ppc.dmxor(<1024 x i1>, <1024 x i1>)
declare <1024 x i1> @llvm.ppc.dmmr(<1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)

attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }


