blob: 6e5e0b04f9424881f2fc78dc44c0551434b2eb41 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: clrldi r5, r4, 48
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: rldicl r5, r4, 48, 48
; CHECK-NEXT: mtvsrd f2, r5
; CHECK-NEXT: rldicl r5, r4, 32, 48
; CHECK-NEXT: rldicl r4, r4, 16, 48
; CHECK-NEXT: mtvsrd f3, r5
; CHECK-NEXT: xxswapd v2, vs1
; CHECK-NEXT: mfvsrd r5, f0
; CHECK-NEXT: xxswapd v3, vs2
; CHECK-NEXT: mtvsrd f0, r4
; CHECK-NEXT: clrldi r4, r5, 48
; CHECK-NEXT: mtvsrd f1, r4
; CHECK-NEXT: rldicl r4, r5, 48, 48
; CHECK-NEXT: xxswapd v4, vs0
; CHECK-NEXT: mtvsrd f2, r4
; CHECK-NEXT: rldicl r4, r5, 32, 48
; CHECK-NEXT: rldicl r5, r5, 16, 48
; CHECK-NEXT: vmrglb v2, v3, v2
; CHECK-NEXT: xxswapd v3, vs3
; CHECK-NEXT: mtvsrd f3, r4
; CHECK-NEXT: xxswapd v5, vs1
; CHECK-NEXT: mtvsrd f0, r5
; CHECK-NEXT: xxswapd v0, vs2
; CHECK-NEXT: xxswapd v1, vs3
; CHECK-NEXT: vmrglb v3, v4, v3
; CHECK-NEXT: xxswapd v6, vs0
; CHECK-NEXT: vmrglb v4, v0, v5
; CHECK-NEXT: vmrglb v5, v6, v1
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: vmrglh v3, v5, v4
; CHECK-NEXT: vmrglw v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test8i8:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lhz r4, -18(r1)
; CHECK-BE-NEXT: stb r4, -48(r1)
; CHECK-BE-NEXT: lhz r4, -20(r1)
; CHECK-BE-NEXT: stb r4, -64(r1)
; CHECK-BE-NEXT: lhz r4, -22(r1)
; CHECK-BE-NEXT: stb r4, -80(r1)
; CHECK-BE-NEXT: lhz r4, -24(r1)
; CHECK-BE-NEXT: stb r4, -96(r1)
; CHECK-BE-NEXT: lhz r4, -26(r1)
; CHECK-BE-NEXT: stb r4, -112(r1)
; CHECK-BE-NEXT: lhz r4, -28(r1)
; CHECK-BE-NEXT: stb r4, -128(r1)
; CHECK-BE-NEXT: lhz r4, -30(r1)
; CHECK-BE-NEXT: stb r4, -144(r1)
; CHECK-BE-NEXT: lhz r4, -32(r1)
; CHECK-BE-NEXT: stb r4, -160(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -80
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -96
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -112
; CHECK-BE-NEXT: lxvw4x v0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -128
; CHECK-BE-NEXT: lxvw4x v1, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -144
; CHECK-BE-NEXT: lxvw4x v6, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -160
; CHECK-BE-NEXT: lxvw4x v7, 0, r4
; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: vmrghb v3, v5, v4
; CHECK-BE-NEXT: vmrghb v4, v1, v0
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: vmrghb v5, v7, v6
; CHECK-BE-NEXT: vmrghh v3, v5, v4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: stxvd2x v2, 0, r4
; CHECK-BE-NEXT: ld r4, -16(r1)
; CHECK-BE-NEXT: std r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
%1 = trunc <8 x i16> %0 to <8 x i8>
store <8 x i8> %1, <8 x i8>* %Sink, align 16
ret void
}
define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrd r4, f0
; CHECK-NEXT: clrldi r5, r4, 48
; CHECK-NEXT: mtvsrd f0, r5
; CHECK-NEXT: rldicl r5, r4, 48, 48
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: rldicl r5, r4, 32, 48
; CHECK-NEXT: rldicl r4, r4, 16, 48
; CHECK-NEXT: mtvsrd f2, r5
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: mtvsrd f3, r4
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: xxswapd v4, vs2
; CHECK-NEXT: xxswapd v5, vs3
; CHECK-NEXT: vmrglb v2, v3, v2
; CHECK-NEXT: vmrglb v3, v5, v4
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i8:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lhz r4, -26(r1)
; CHECK-BE-NEXT: stb r4, -48(r1)
; CHECK-BE-NEXT: lhz r4, -28(r1)
; CHECK-BE-NEXT: stb r4, -64(r1)
; CHECK-BE-NEXT: lhz r4, -30(r1)
; CHECK-BE-NEXT: stb r4, -80(r1)
; CHECK-BE-NEXT: lhz r4, -32(r1)
; CHECK-BE-NEXT: stb r4, -96(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -80
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -96
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghb v3, v5, v4
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
%1 = trunc <4 x i16> %0 to <4 x i8>
store <4 x i8> %1, <4 x i8>* %Sink, align 16
ret void
}
define void @test4i8w(<4 x i8>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i8w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrwz r4, v2
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
; CHECK-NEXT: mtvsrd f2, r4
; CHECK-NEXT: mfvsrwz r4, f0
; CHECK-NEXT: mfvsrwz r5, f1
; CHECK-NEXT: xxswapd v4, vs2
; CHECK-NEXT: mtvsrd f0, r4
; CHECK-NEXT: mfvsrwz r4, f3
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: mtvsrd f3, r4
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: xxswapd v5, vs3
; CHECK-NEXT: vmrglb v2, v3, v2
; CHECK-NEXT: vmrglb v3, v5, v4
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i8w:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lwz r4, -20(r1)
; CHECK-BE-NEXT: stb r4, -48(r1)
; CHECK-BE-NEXT: lwz r4, -24(r1)
; CHECK-BE-NEXT: stb r4, -64(r1)
; CHECK-BE-NEXT: lwz r4, -28(r1)
; CHECK-BE-NEXT: stb r4, -80(r1)
; CHECK-BE-NEXT: lwz r4, -32(r1)
; CHECK-BE-NEXT: stb r4, -96(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -80
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -96
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghb v3, v5, v4
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <4 x i32>, <4 x i32>* %SrcPtr, align 16
%1 = trunc <4 x i32> %0 to <4 x i8>
store <4 x i8> %1, <4 x i8>* %Sink, align 16
ret void
}
define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrd r4, f0
; CHECK-NEXT: clrldi r5, r4, 48
; CHECK-NEXT: rldicl r4, r4, 48, 48
; CHECK-NEXT: mtvsrd f0, r5
; CHECK-NEXT: mtvsrd f1, r4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: vmrglb v2, v3, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrd r4, f0
; CHECK-NEXT: clrldi r4, r4, 48
; CHECK-NEXT: sth r4, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2i8:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lhz r4, -30(r1)
; CHECK-BE-NEXT: stb r4, -48(r1)
; CHECK-BE-NEXT: lhz r4, -32(r1)
; CHECK-BE-NEXT: stb r4, -64(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lhz r4, -16(r1)
; CHECK-BE-NEXT: sth r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
%1 = trunc <2 x i16> %0 to <2 x i8>
store <2 x i8> %1, <2 x i8>* %Sink, align 16
ret void
}
define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrwz r4, v2
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
; CHECK-NEXT: mtvsrd f2, r4
; CHECK-NEXT: mfvsrwz r4, f0
; CHECK-NEXT: mfvsrwz r5, f1
; CHECK-NEXT: xxswapd v4, vs2
; CHECK-NEXT: mtvsrd f0, r4
; CHECK-NEXT: mfvsrwz r4, f3
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: mtvsrd f3, r4
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: xxswapd v5, vs3
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: vmrglh v3, v5, v4
; CHECK-NEXT: vmrglw v2, v3, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i16:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lwz r4, -20(r1)
; CHECK-BE-NEXT: sth r4, -48(r1)
; CHECK-BE-NEXT: lwz r4, -24(r1)
; CHECK-BE-NEXT: sth r4, -64(r1)
; CHECK-BE-NEXT: lwz r4, -28(r1)
; CHECK-BE-NEXT: sth r4, -80(r1)
; CHECK-BE-NEXT: lwz r4, -32(r1)
; CHECK-BE-NEXT: sth r4, -96(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -80
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -96
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghh v3, v5, v4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: stxvd2x v2, 0, r4
; CHECK-BE-NEXT: ld r4, -16(r1)
; CHECK-BE-NEXT: std r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <4 x i32>, <4 x i32>* %SrcPtr, align 16
%1 = trunc <4 x i32> %0 to <4 x i16>
store <4 x i16> %1, <4 x i16>* %Sink, align 16
ret void
}
define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
; CHECK-NEXT: mfvsrwz r4, f0
; CHECK-NEXT: mfvsrwz r5, f1
; CHECK-NEXT: mtvsrd f0, r4
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2i16:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
; CHECK-BE-NEXT: lwz r4, -28(r1)
; CHECK-BE-NEXT: sth r4, -48(r1)
; CHECK-BE-NEXT: lwz r4, -32(r1)
; CHECK-BE-NEXT: sth r4, -64(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <2 x i32>, <2 x i32>* %SrcPtr, align 16
%1 = trunc <2 x i32> %0 to <2 x i16>
store <2 x i16> %1, <2 x i16>* %Sink, align 16
ret void
}
define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i16d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: xxswapd vs1, vs0
; CHECK-NEXT: mfvsrwz r4, f0
; CHECK-NEXT: mtvsrd f0, r4
; CHECK-NEXT: mfvsrwz r5, f1
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: mtvsrd f1, r5
; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2i16d:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvd2x vs0, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -32
; CHECK-BE-NEXT: stxvd2x vs0, 0, r4
; CHECK-BE-NEXT: lwz r4, -20(r1)
; CHECK-BE-NEXT: sth r4, -48(r1)
; CHECK-BE-NEXT: lwz r4, -28(r1)
; CHECK-BE-NEXT: sth r4, -64(r1)
; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -64
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -16
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = load <2 x i64>, <2 x i64>* %SrcPtr, align 16
%1 = trunc <2 x i64> %0 to <2 x i16>
store <2 x i16> %1, <2 x i16>* %Sink, align 16
ret void
}