blob: d5a24309f94d531b3fdd7297e19d5eb86c41cfff [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
; RUN: -mcpu=future -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
; CHECK-LABEL: test_wacc_copy:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: std r31, -8(r1)
; CHECK-NEXT: std r30, -16(r1)
; CHECK-NEXT: mr r30, r1
; CHECK-NEXT: clrldi r0, r1, 57
; CHECK-NEXT: subfic r0, r0, -384
; CHECK-NEXT: stdux r1, r1, r0
; CHECK-NEXT: .cfi_def_cfa_register r30
; CHECK-NEXT: .cfi_offset r31, -8
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: mr r31, r1
; CHECK-NEXT: std r3, 360(r31)
; CHECK-NEXT: std r4, 352(r31)
; CHECK-NEXT: stxv v2, 336(r31)
; CHECK-NEXT: std r7, 328(r31)
; CHECK-NEXT: ld r3, 360(r31)
; CHECK-NEXT: lxvp vsp34, 0(r3)
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: lxvp vsp34, 64(r3)
; CHECK-NEXT: lxvp vsp36, 96(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 224(r31)
; CHECK-NEXT: stxvp vsp36, 192(r31)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 160(r31)
; CHECK-NEXT: stxvp vsp36, 128(r31)
; CHECK-NEXT: ld r3, 352(r31)
; CHECK-NEXT: lxv v2, 16(r3)
; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: stxv v2, 112(r31)
; CHECK-NEXT: stxv v3, 96(r31)
; CHECK-NEXT: lxv v2, 112(r31)
; CHECK-NEXT: lxv v3, 96(r31)
; CHECK-NEXT: lxv vs0, 336(r31)
; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 224(r31)
; CHECK-NEXT: stxvp vsp36, 192(r31)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 160(r31)
; CHECK-NEXT: stxvp vsp36, 128(r31)
; CHECK-NEXT: lxvp vsp34, 128(r31)
; CHECK-NEXT: lxvp vsp36, 160(r31)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: lxvp vsp34, 192(r31)
; CHECK-NEXT: lxvp vsp36, 224(r31)
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: ld r3, 328(r31)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r3)
; CHECK-NEXT: stxvp vsp36, 64(r3)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r3)
; CHECK-NEXT: stxvp vsp36, 0(r3)
; CHECK-NEXT: mr r1, r30
; CHECK-NEXT: ld r31, -8(r1)
; CHECK-NEXT: ld r30, -16(r1)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test_wacc_copy:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: std r31, -8(r1)
; CHECK-BE-NEXT: std r30, -16(r1)
; CHECK-BE-NEXT: mr r30, r1
; CHECK-BE-NEXT: clrldi r0, r1, 57
; CHECK-BE-NEXT: subfic r0, r0, -384
; CHECK-BE-NEXT: stdux r1, r1, r0
; CHECK-BE-NEXT: mr r31, r1
; CHECK-BE-NEXT: std r3, 360(r31)
; CHECK-BE-NEXT: std r4, 352(r31)
; CHECK-BE-NEXT: stxv v2, 336(r31)
; CHECK-BE-NEXT: std r5, 328(r31)
; CHECK-BE-NEXT: ld r3, 360(r31)
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
; CHECK-BE-NEXT: ld r3, 352(r31)
; CHECK-BE-NEXT: lxv v2, 0(r3)
; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v3, 112(r31)
; CHECK-BE-NEXT: stxv v2, 96(r31)
; CHECK-BE-NEXT: lxv v2, 96(r31)
; CHECK-BE-NEXT: lxv v3, 112(r31)
; CHECK-BE-NEXT: lxv vs0, 336(r31)
; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
; CHECK-BE-NEXT: lxvp vsp34, 224(r31)
; CHECK-BE-NEXT: lxvp vsp36, 192(r31)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: lxvp vsp34, 160(r31)
; CHECK-BE-NEXT: lxvp vsp36, 128(r31)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT: ld r3, 328(r31)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r3)
; CHECK-BE-NEXT: stxvp vsp34, 64(r3)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r3)
; CHECK-BE-NEXT: stxvp vsp34, 0(r3)
; CHECK-BE-NEXT: mr r1, r30
; CHECK-BE-NEXT: ld r31, -8(r1)
; CHECK-BE-NEXT: ld r30, -16(r1)
; CHECK-BE-NEXT: blr
entry:
%vdmrp.addr = alloca ptr, align 8
%vpp.addr = alloca ptr, align 8
%vc.addr = alloca <16 x i8>, align 16
%resp.addr = alloca ptr, align 8
%vdmr = alloca <1024 x i1>, align 128
%vp = alloca <256 x i1>, align 32
store ptr %vdmrp, ptr %vdmrp.addr, align 8
store ptr %vpp, ptr %vpp.addr, align 8
store <16 x i8> %vc, ptr %vc.addr, align 16
store ptr %resp, ptr %resp.addr, align 8
%0 = load ptr, ptr %vdmrp.addr, align 8
%1 = load <1024 x i1>, ptr %0, align 128
store <1024 x i1> %1, ptr %vdmr, align 128
%2 = load ptr, ptr %vpp.addr, align 8
%3 = load <256 x i1>, ptr %2, align 32
store <256 x i1> %3, ptr %vp, align 32
%4 = load <256 x i1>, ptr %vp, align 32
%5 = load <16 x i8>, ptr %vc.addr, align 16
%6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
store <1024 x i1> %6, ptr %vdmr, align 128
%7 = load <1024 x i1>, ptr %vdmr, align 128
%8 = load ptr, ptr %resp.addr, align 8
store <1024 x i1> %7, ptr %8, align 128
ret void
}
define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: dmsetdmrz dmr0
; CHECK-NEXT: lxvp vsp34, 0(r3)
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-NEXT: lxvp vsp34, 64(r3)
; CHECK-NEXT: lxvp vsp36, 96(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-NEXT: dmmr dmr2, dmr0
; CHECK-NEXT: dmxor dmr2, dmr1
; CHECK-NEXT: lxvp vsp34, 0(r4)
; CHECK-NEXT: lxvp vsp36, 32(r4)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-NEXT: lxvp vsp34, 64(r4)
; CHECK-NEXT: lxvp vsp36, 96(r4)
; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-NEXT: dmxor dmr0, dmr1
; CHECK-NEXT: dmmr dmr1, dmr2
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
; CHECK-NEXT: stxvp vsp36, 64(r5)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
; CHECK-NEXT: stxvp vsp34, 32(r5)
; CHECK-NEXT: stxvp vsp36, 0(r5)
; CHECK-NEXT: dmmr dmr0, dmr0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r6)
; CHECK-NEXT: stxvp vsp36, 64(r6)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r6)
; CHECK-NEXT: stxvp vsp36, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: foo:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: dmsetdmrz dmr0
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-BE-NEXT: dmmr dmr2, dmr0
; CHECK-BE-NEXT: dmxor dmr2, dmr1
; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
; CHECK-BE-NEXT: dmxor dmr0, dmr1
; CHECK-BE-NEXT: dmmr dmr1, dmr2
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
; CHECK-BE-NEXT: dmmr dmr0, dmr0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
; CHECK-BE-NEXT: blr
entry:
%0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
%1 = load <1024 x i1>, ptr %p1, align 128
%2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
%3 = load <1024 x i1>, ptr %p2, align 128
%4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
%5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
store <1024 x i1> %5, ptr %res1, align 128
%6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
store <1024 x i1> %6, ptr %res2, align 128
ret void
}
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }