blob: 2938857a124f2159639aa90333895898b46bb6d4 [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
; Legal
define <4 x i32> @sdiv_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m) {
; CHECK-LABEL: sdiv_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 32, 32, 32
; CHECK-NEXT: vspltisw 5, 1
; CHECK-NEXT: xxsldwi 1, 34, 34, 1
; CHECK-NEXT: vslw 4, 4, 0
; CHECK-NEXT: xxswapd 4, 34
; CHECK-NEXT: xxsldwi 6, 34, 34, 3
; CHECK-NEXT: mffprwz 4, 1
; CHECK-NEXT: vsraw 4, 4, 0
; CHECK-NEXT: xxsel 0, 37, 35, 36
; CHECK-NEXT: xxsldwi 2, 0, 0, 1
; CHECK-NEXT: xxswapd 3, 0
; CHECK-NEXT: xxsldwi 5, 0, 0, 3
; CHECK-NEXT: mffprwz 3, 2
; CHECK-NEXT: mffprwz 5, 3
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mffprwz 4, 4
; CHECK-NEXT: divw 4, 4, 5
; CHECK-NEXT: mfvsrwz 5, 34
; CHECK-NEXT: rldimi 4, 3, 32, 0
; CHECK-NEXT: mffprwz 3, 5
; CHECK-NEXT: mtfprd 1, 4
; CHECK-NEXT: mffprwz 4, 6
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mffprwz 4, 0
; CHECK-NEXT: divw 4, 5, 4
; CHECK-NEXT: rldimi 4, 3, 32, 0
; CHECK-NEXT: mtfprd 0, 4
; CHECK-NEXT: xxmrghd 34, 0, 1
; CHECK-NEXT: blr
%res = call <4 x i32> @llvm.masked.sdiv(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m)
ret <4 x i32> %res
}
define <2 x i64> @sdiv_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) {
; CHECK-LABEL: sdiv_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xxleqv 32, 32, 32
; CHECK-NEXT: vspltisw 5, 1
; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: xxswapd 2, 34
; CHECK-NEXT: vsld 4, 4, 0
; CHECK-NEXT: vsrad 4, 4, 0
; CHECK-NEXT: vupklsw 5, 5
; CHECK-NEXT: xxsel 0, 37, 35, 36
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: divd 3, 4, 3
; CHECK-NEXT: mffprd 4, 2
; CHECK-NEXT: xxswapd 1, 0
; CHECK-NEXT: mtfprd 0, 3
; CHECK-NEXT: mffprd 3, 1
; CHECK-NEXT: divd 3, 4, 3
; CHECK-NEXT: mtfprd 1, 3
; CHECK-NEXT: xxmrghd 34, 0, 1
; CHECK-NEXT: blr
%res = call <2 x i64> @llvm.masked.sdiv(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m)
ret <2 x i64> %res
}
; Splitting
define <4 x i64> @sdiv_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m) {
; CHECK-LABEL: sdiv_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xxmrglw 32, 38, 38
; CHECK-NEXT: xxleqv 39, 39, 39
; CHECK-NEXT: xxmrghw 33, 38, 38
; CHECK-NEXT: mfvsrd 3, 34
; CHECK-NEXT: vspltisw 6, 1
; CHECK-NEXT: mfvsrd 4, 35
; CHECK-NEXT: xxswapd 2, 34
; CHECK-NEXT: xxswapd 4, 35
; CHECK-NEXT: vsld 0, 0, 7
; CHECK-NEXT: mffprd 5, 2
; CHECK-NEXT: vsrad 0, 0, 7
; CHECK-NEXT: vupklsw 6, 6
; CHECK-NEXT: xxsel 0, 38, 36, 32
; CHECK-NEXT: vsld 4, 1, 7
; CHECK-NEXT: mffprd 6, 0
; CHECK-NEXT: vsrad 4, 4, 7
; CHECK-NEXT: divd 3, 3, 6
; CHECK-NEXT: xxswapd 3, 0
; CHECK-NEXT: mtfprd 0, 3
; CHECK-NEXT: xxsel 1, 38, 37, 36
; CHECK-NEXT: mffprd 6, 1
; CHECK-NEXT: divd 4, 4, 6
; CHECK-NEXT: mffprd 6, 3
; CHECK-NEXT: divd 5, 5, 6
; CHECK-NEXT: mtfprd 2, 5
; CHECK-NEXT: xxswapd 5, 1
; CHECK-NEXT: mtfprd 1, 4
; CHECK-NEXT: mffprd 3, 5
; CHECK-NEXT: mffprd 4, 4
; CHECK-NEXT: divd 3, 4, 3
; CHECK-NEXT: xxmrghd 34, 0, 2
; CHECK-NEXT: mtfprd 0, 3
; CHECK-NEXT: xxmrghd 35, 1, 0
; CHECK-NEXT: blr
%res = call <4 x i64> @llvm.masked.sdiv(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m)
ret <4 x i64> %res
}
; Widening
define <2 x i32> @sdiv_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m) {
; CHECK-LABEL: sdiv_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha
; CHECK-NEXT: xxlxor 32, 32, 32
; CHECK-NEXT: xxsldwi 1, 34, 34, 1
; CHECK-NEXT: addi 3, 3, .LCPI3_0@toc@l
; CHECK-NEXT: mffprwz 4, 1
; CHECK-NEXT: xxswapd 4, 34
; CHECK-NEXT: xxsldwi 6, 34, 34, 3
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxswapd 37, 0
; CHECK-NEXT: vperm 4, 0, 4, 5
; CHECK-NEXT: xxleqv 32, 32, 32
; CHECK-NEXT: vspltisw 5, 1
; CHECK-NEXT: vslw 4, 4, 0
; CHECK-NEXT: vsraw 4, 4, 0
; CHECK-NEXT: xxsel 0, 37, 35, 36
; CHECK-NEXT: xxsldwi 2, 0, 0, 1
; CHECK-NEXT: xxswapd 3, 0
; CHECK-NEXT: xxsldwi 5, 0, 0, 3
; CHECK-NEXT: mffprwz 3, 2
; CHECK-NEXT: mffprwz 5, 3
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mffprwz 4, 4
; CHECK-NEXT: divw 4, 4, 5
; CHECK-NEXT: mfvsrwz 5, 34
; CHECK-NEXT: rldimi 4, 3, 32, 0
; CHECK-NEXT: mffprwz 3, 5
; CHECK-NEXT: mtfprd 1, 4
; CHECK-NEXT: mffprwz 4, 6
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mffprwz 4, 0
; CHECK-NEXT: divw 4, 5, 4
; CHECK-NEXT: rldimi 4, 3, 32, 0
; CHECK-NEXT: mtfprd 0, 4
; CHECK-NEXT: xxmrghd 34, 0, 1
; CHECK-NEXT: blr
%res = call <2 x i32> @llvm.masked.sdiv(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m)
ret <2 x i32> %res
}
; Promotion
define <4 x i16> @sdiv_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m) {
; CHECK-LABEL: sdiv_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xxswapd 0, 36
; CHECK-NEXT: xxsldwi 1, 36, 36, 1
; CHECK-NEXT: mfvsrwz 3, 36
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: xxsldwi 2, 36, 36, 3
; CHECK-NEXT: mffprwz 4, 0
; CHECK-NEXT: mffprwz 5, 1
; CHECK-NEXT: mffprwz 6, 2
; CHECK-NEXT: mtvsrd 36, 3
; CHECK-NEXT: mtvsrd 37, 4
; CHECK-NEXT: mtvsrd 32, 5
; CHECK-NEXT: mfvsrd 5, 34
; CHECK-NEXT: rldicl 8, 5, 48, 48
; CHECK-NEXT: rldicl 9, 5, 32, 48
; CHECK-NEXT: extsh 8, 8
; CHECK-NEXT: extsh 9, 9
; CHECK-NEXT: vmrghh 5, 0, 5
; CHECK-NEXT: mtvsrd 32, 6
; CHECK-NEXT: vmrghh 4, 0, 4
; CHECK-NEXT: mtvsrd 32, 7
; CHECK-NEXT: clrldi 7, 5, 48
; CHECK-NEXT: rldicl 5, 5, 16, 48
; CHECK-NEXT: extsh 7, 7
; CHECK-NEXT: extsh 5, 5
; CHECK-NEXT: xxmrglw 1, 36, 37
; CHECK-NEXT: vspltish 4, 15
; CHECK-NEXT: vsplth 0, 0, 3
; CHECK-NEXT: xxspltw 0, 32, 3
; CHECK-NEXT: vspltish 0, 1
; CHECK-NEXT: xxmrgld 37, 0, 1
; CHECK-NEXT: xxswapd 1, 34
; CHECK-NEXT: vslh 5, 5, 4
; CHECK-NEXT: mffprd 3, 1
; CHECK-NEXT: vsrah 4, 5, 4
; CHECK-NEXT: clrldi 10, 3, 48
; CHECK-NEXT: rldicl 11, 3, 48, 48
; CHECK-NEXT: extsh 10, 10
; CHECK-NEXT: extsh 11, 11
; CHECK-NEXT: xxsel 0, 32, 35, 36
; CHECK-NEXT: mffprd 6, 0
; CHECK-NEXT: clrldi 12, 6, 48
; CHECK-NEXT: extsh 12, 12
; CHECK-NEXT: divw 7, 7, 12
; CHECK-NEXT: rldicl 12, 6, 48, 48
; CHECK-NEXT: extsh 12, 12
; CHECK-NEXT: divw 8, 8, 12
; CHECK-NEXT: xxswapd 2, 0
; CHECK-NEXT: mffprd 4, 2
; CHECK-NEXT: rldicl 12, 6, 32, 48
; CHECK-NEXT: rldicl 6, 6, 16, 48
; CHECK-NEXT: extsh 6, 6
; CHECK-NEXT: extsh 12, 12
; CHECK-NEXT: divw 5, 5, 6
; CHECK-NEXT: clrldi 6, 4, 48
; CHECK-NEXT: divw 9, 9, 12
; CHECK-NEXT: rldicl 12, 3, 32, 48
; CHECK-NEXT: rldicl 3, 3, 16, 48
; CHECK-NEXT: extsh 6, 6
; CHECK-NEXT: extsh 12, 12
; CHECK-NEXT: extsh 3, 3
; CHECK-NEXT: divw 6, 10, 6
; CHECK-NEXT: rldicl 10, 4, 48, 48
; CHECK-NEXT: extsh 10, 10
; CHECK-NEXT: mtvsrd 34, 7
; CHECK-NEXT: divw 10, 11, 10
; CHECK-NEXT: rldicl 11, 4, 32, 48
; CHECK-NEXT: rldicl 4, 4, 16, 48
; CHECK-NEXT: extsh 11, 11
; CHECK-NEXT: extsh 4, 4
; CHECK-NEXT: mtvsrd 35, 8
; CHECK-NEXT: divw 11, 12, 11
; CHECK-NEXT: divw 3, 3, 4
; CHECK-NEXT: mtvsrd 36, 9
; CHECK-NEXT: mtvsrd 37, 5
; CHECK-NEXT: mtvsrd 32, 6
; CHECK-NEXT: vmrghh 2, 3, 2
; CHECK-NEXT: vmrghh 3, 5, 4
; CHECK-NEXT: mtvsrd 36, 10
; CHECK-NEXT: mtvsrd 37, 11
; CHECK-NEXT: xxmrglw 0, 35, 34
; CHECK-NEXT: vmrghh 4, 4, 0
; CHECK-NEXT: mtvsrd 32, 3
; CHECK-NEXT: vmrghh 5, 0, 5
; CHECK-NEXT: xxmrglw 1, 37, 36
; CHECK-NEXT: xxmrgld 34, 0, 1
; CHECK-NEXT: blr
%res = call <4 x i16> @llvm.masked.sdiv(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m)
ret <4 x i16> %res
}
; Scalarization
define <1 x i64> @sdiv_v1i164(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m) {
; CHECK-LABEL: sdiv_v1i164:
; CHECK: # %bb.0:
; CHECK-NEXT: andi. 5, 5, 1
; CHECK-NEXT: li 5, 1
; CHECK-NEXT: iselgt 4, 4, 5
; CHECK-NEXT: divd 3, 3, 4
; CHECK-NEXT: blr
%res = call <1 x i64> @llvm.masked.sdiv(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m)
ret <1 x i64> %res
}
; Expansion
define <2 x i128> @sdiv_v2i128(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m) nounwind {
; CHECK-LABEL: sdiv_v2i128:
; CHECK: # %bb.0:
; CHECK-NEXT: mfocrf 12, 32
; CHECK-NEXT: stw 12, 8(1)
; CHECK-NEXT: mflr 0
; CHECK-NEXT: stdu 1, -128(1)
; CHECK-NEXT: li 3, 48
; CHECK-NEXT: std 0, 144(1)
; CHECK-NEXT: xxswapd 0, 38
; CHECK-NEXT: xxswapd 1, 37
; CHECK-NEXT: std 30, 112(1) # 8-byte Folded Spill
; CHECK-NEXT: li 30, 1
; CHECK-NEXT: std 29, 104(1) # 8-byte Folded Spill
; CHECK-NEXT: li 29, 0
; CHECK-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill
; CHECK-NEXT: li 3, 64
; CHECK-NEXT: mfvsrd 4, 35
; CHECK-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill
; CHECK-NEXT: li 3, 80
; CHECK-NEXT: vmr 30, 2
; CHECK-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: vmr 31, 4
; CHECK-NEXT: andi. 3, 3, 1
; CHECK-NEXT: mfvsrd 3, 38
; CHECK-NEXT: crmove 8, 1
; CHECK-NEXT: andi. 3, 3, 1
; CHECK-NEXT: mffprd 3, 1
; CHECK-NEXT: iselgt 5, 3, 30
; CHECK-NEXT: mfvsrd 3, 37
; CHECK-NEXT: xxswapd 0, 35
; CHECK-NEXT: iselgt 6, 3, 29
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: bl __divti3
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd 0, 63
; CHECK-NEXT: mtfprd 1, 3
; CHECK-NEXT: mtfprd 2, 4
; CHECK-NEXT: mfvsrd 4, 62
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: isel 5, 3, 30, 8
; CHECK-NEXT: mfvsrd 3, 63
; CHECK-NEXT: isel 6, 3, 29, 8
; CHECK-NEXT: xxswapd 0, 62
; CHECK-NEXT: mffprd 3, 0
; CHECK-NEXT: xxmrghd 61, 2, 1
; CHECK-NEXT: bl __divti3
; CHECK-NEXT: nop
; CHECK-NEXT: mtfprd 0, 3
; CHECK-NEXT: li 3, 80
; CHECK-NEXT: mtfprd 1, 4
; CHECK-NEXT: ld 30, 112(1) # 8-byte Folded Reload
; CHECK-NEXT: vmr 3, 29
; CHECK-NEXT: ld 29, 104(1) # 8-byte Folded Reload
; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 64
; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: li 3, 48
; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
; CHECK-NEXT: xxmrghd 34, 1, 0
; CHECK-NEXT: addi 1, 1, 128
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: lwz 12, 8(1)
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: mtocrf 32, 12
; CHECK-NEXT: blr
%res = call <2 x i128> @llvm.masked.sdiv(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m)
ret <2 x i128> %res
}
; Promotion and widening
define <3 x i10> @sdiv_v3i10(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) {
; CHECK-LABEL: sdiv_v3i10:
; CHECK: # %bb.0:
; CHECK-NEXT: mtfprwz 0, 9
; CHECK-NEXT: mtfprwz 1, 10
; CHECK-NEXT: addis 9, 2, .LCPI7_0@toc@ha
; CHECK-NEXT: addi 9, 9, .LCPI7_0@toc@l
; CHECK-NEXT: mtvsrwz 38, 8
; CHECK-NEXT: vspltisw 4, 11
; CHECK-NEXT: vadduwm 4, 4, 4
; CHECK-NEXT: lxvd2x 2, 0, 9
; CHECK-NEXT: xxmrghw 35, 1, 0
; CHECK-NEXT: mtfprwz 0, 6
; CHECK-NEXT: lbz 6, 96(1)
; CHECK-NEXT: mtfprwz 1, 7
; CHECK-NEXT: mtvsrwz 32, 6
; CHECK-NEXT: addis 6, 2, .LCPI7_1@toc@ha
; CHECK-NEXT: addi 6, 6, .LCPI7_1@toc@l
; CHECK-NEXT: xxswapd 34, 2
; CHECK-NEXT: xxmrghw 37, 1, 0
; CHECK-NEXT: mtfprwz 1, 4
; CHECK-NEXT: lxvd2x 0, 0, 6
; CHECK-NEXT: vperm 5, 6, 5, 2
; CHECK-NEXT: mtvsrwz 38, 5
; CHECK-NEXT: vslw 5, 5, 4
; CHECK-NEXT: vsraw 5, 5, 4
; CHECK-NEXT: vperm 3, 0, 3, 2
; CHECK-NEXT: xxswapd 32, 0
; CHECK-NEXT: mtfprwz 0, 3
; CHECK-NEXT: xxland 35, 35, 32
; CHECK-NEXT: xxleqv 32, 32, 32
; CHECK-NEXT: vslw 3, 3, 0
; CHECK-NEXT: vsraw 3, 3, 0
; CHECK-NEXT: xxmrghw 33, 1, 0
; CHECK-NEXT: vperm 1, 6, 1, 2
; CHECK-NEXT: vspltisw 6, 1
; CHECK-NEXT: xxsel 0, 38, 37, 35
; CHECK-NEXT: vslw 3, 1, 4
; CHECK-NEXT: vsraw 3, 3, 4
; CHECK-NEXT: xxswapd 1, 0
; CHECK-NEXT: xxsldwi 3, 0, 0, 1
; CHECK-NEXT: mffprwz 3, 1
; CHECK-NEXT: xxswapd 2, 35
; CHECK-NEXT: xxsldwi 4, 35, 35, 1
; CHECK-NEXT: mffprwz 4, 2
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mffprwz 4, 3
; CHECK-NEXT: mtfprwz 1, 3
; CHECK-NEXT: mffprwz 3, 4
; CHECK-NEXT: divw 3, 3, 4
; CHECK-NEXT: mfvsrwz 4, 35
; CHECK-NEXT: mtfprwz 2, 3
; CHECK-NEXT: mffprwz 3, 0
; CHECK-NEXT: divw 3, 4, 3
; CHECK-NEXT: mtvsrwz 35, 3
; CHECK-NEXT: xxmrghw 36, 2, 1
; CHECK-NEXT: vperm 2, 3, 4, 2
; CHECK-NEXT: mfvsrwz 5, 34
; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: xxsldwi 1, 34, 34, 1
; CHECK-NEXT: mffprwz 3, 0
; CHECK-NEXT: mffprwz 4, 1
; CHECK-NEXT: blr
%res = call <3 x i10> @llvm.masked.sdiv(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m)
ret <3 x i10> %res
}