blob: 3e53acabf81ee708f05d56ea4ca287b2d38808fb [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple aarch64 < %s | FileCheck %s --check-prefix=NEON
; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
; Legal
define <4 x i32> @udiv_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m) {
; NEON-LABEL: udiv_v4i32:
; NEON: // %bb.0:
; NEON-NEXT: ushll v2.4s, v2.4h, #0
; NEON-NEXT: fmov w8, s0
; NEON-NEXT: mov w12, v0.s[3]
; NEON-NEXT: shl v2.4s, v2.4s, #31
; NEON-NEXT: cmlt v2.4s, v2.4s, #0
; NEON-NEXT: and v1.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v2.16b, v2.16b
; NEON-NEXT: sub v1.4s, v1.4s, v2.4s
; NEON-NEXT: fmov w9, s1
; NEON-NEXT: mov w10, v1.s[1]
; NEON-NEXT: mov w11, v1.s[2]
; NEON-NEXT: udiv w8, w8, w9
; NEON-NEXT: mov w9, v0.s[1]
; NEON-NEXT: udiv w9, w9, w10
; NEON-NEXT: mov w10, v0.s[2]
; NEON-NEXT: fmov s0, w8
; NEON-NEXT: udiv w10, w10, w11
; NEON-NEXT: mov w11, v1.s[3]
; NEON-NEXT: mov v0.s[1], w9
; NEON-NEXT: udiv w8, w12, w11
; NEON-NEXT: mov v0.s[2], w10
; NEON-NEXT: mov v0.s[3], w8
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v4i32:
; SVE: // %bb.0:
; SVE-NEXT: ushll v2.4s, v2.4h, #0
; SVE-NEXT: ptrue p0.s, vl4
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; SVE-NEXT: shl v2.4s, v2.4s, #31
; SVE-NEXT: cmpne p1.s, p0/z, z2.s, #0
; SVE-NEXT: udiv z0.s, p1/m, z0.s, z1.s
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT: ret
%res = call <4 x i32> @llvm.masked.udiv(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m)
ret <4 x i32> %res
}
define <2 x i64> @udiv_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) {
; NEON-LABEL: udiv_v2i64:
; NEON: // %bb.0:
; NEON-NEXT: ushll v2.2d, v2.2s, #0
; NEON-NEXT: fmov x8, d0
; NEON-NEXT: shl v2.2d, v2.2d, #63
; NEON-NEXT: cmlt v2.2d, v2.2d, #0
; NEON-NEXT: and v1.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v2.16b, v2.16b
; NEON-NEXT: sub v1.2d, v1.2d, v2.2d
; NEON-NEXT: fmov x9, d1
; NEON-NEXT: mov x10, v1.d[1]
; NEON-NEXT: udiv x8, x8, x9
; NEON-NEXT: mov x9, v0.d[1]
; NEON-NEXT: udiv x9, x9, x10
; NEON-NEXT: fmov d0, x8
; NEON-NEXT: mov v0.d[1], x9
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v2i64:
; SVE: // %bb.0:
; SVE-NEXT: ushll v2.2d, v2.2s, #0
; SVE-NEXT: ptrue p0.d, vl2
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; SVE-NEXT: shl v2.2d, v2.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z2.d, #0
; SVE-NEXT: udiv z0.d, p1/m, z0.d, z1.d
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT: ret
%res = call <2 x i64> @llvm.masked.udiv(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m)
ret <2 x i64> %res
}
; Splitting
define <4 x i64> @udiv_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m) {
; NEON-LABEL: udiv_v4i64:
; NEON: // %bb.0:
; NEON-NEXT: ushll v4.4s, v4.4h, #0
; NEON-NEXT: mov x8, v1.d[1]
; NEON-NEXT: fmov x11, d0
; NEON-NEXT: mov x12, v0.d[1]
; NEON-NEXT: ushll2 v5.2d, v4.4s, #0
; NEON-NEXT: shl v5.2d, v5.2d, #63
; NEON-NEXT: cmlt v5.2d, v5.2d, #0
; NEON-NEXT: and v3.16b, v3.16b, v5.16b
; NEON-NEXT: mvn v5.16b, v5.16b
; NEON-NEXT: sub v3.2d, v3.2d, v5.2d
; NEON-NEXT: mov x9, v3.d[1]
; NEON-NEXT: fmov x10, d3
; NEON-NEXT: udiv x8, x8, x9
; NEON-NEXT: fmov x9, d1
; NEON-NEXT: ushll v1.2d, v4.2s, #0
; NEON-NEXT: shl v1.2d, v1.2d, #63
; NEON-NEXT: cmlt v1.2d, v1.2d, #0
; NEON-NEXT: and v2.16b, v2.16b, v1.16b
; NEON-NEXT: mvn v1.16b, v1.16b
; NEON-NEXT: sub v1.2d, v2.2d, v1.2d
; NEON-NEXT: udiv x9, x9, x10
; NEON-NEXT: fmov x10, d1
; NEON-NEXT: udiv x10, x11, x10
; NEON-NEXT: mov x11, v1.d[1]
; NEON-NEXT: fmov d1, x9
; NEON-NEXT: mov v1.d[1], x8
; NEON-NEXT: udiv x11, x12, x11
; NEON-NEXT: fmov d0, x10
; NEON-NEXT: mov v0.d[1], x11
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v4i64:
; SVE: // %bb.0:
; SVE-NEXT: ushll v4.4s, v4.4h, #0
; SVE-NEXT: ptrue p0.d, vl2
; SVE-NEXT: // kill: def $q3 killed $q3 def $z3
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: // kill: def $q2 killed $q2 def $z2
; SVE-NEXT: ushll2 v5.2d, v4.4s, #0
; SVE-NEXT: shl v5.2d, v5.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z5.d, #0
; SVE-NEXT: udiv z1.d, p1/m, z1.d, z3.d
; SVE-NEXT: ushll v3.2d, v4.2s, #0
; SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
; SVE-NEXT: shl v3.2d, v3.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0
; SVE-NEXT: udiv z0.d, p1/m, z0.d, z2.d
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE-NEXT: ret
%res = call <4 x i64> @llvm.masked.udiv(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m)
ret <4 x i64> %res
}
; Widening
define <2 x i32> @udiv_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m) {
; NEON-LABEL: udiv_v2i32:
; NEON: // %bb.0:
; NEON-NEXT: shl v2.2s, v2.2s, #31
; NEON-NEXT: // kill: def $d0 killed $d0 def $q0
; NEON-NEXT: fmov w8, s0
; NEON-NEXT: cmlt v2.2s, v2.2s, #0
; NEON-NEXT: and v1.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v2.8b, v2.8b
; NEON-NEXT: sub v1.2s, v1.2s, v2.2s
; NEON-NEXT: fmov w9, s1
; NEON-NEXT: mov w10, v1.s[1]
; NEON-NEXT: udiv w8, w8, w9
; NEON-NEXT: mov w9, v0.s[1]
; NEON-NEXT: udiv w9, w9, w10
; NEON-NEXT: fmov s0, w8
; NEON-NEXT: mov v0.s[1], w9
; NEON-NEXT: // kill: def $d0 killed $d0 killed $q0
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v2i32:
; SVE: // %bb.0:
; SVE-NEXT: shl v2.2s, v2.2s, #31
; SVE-NEXT: ptrue p0.s, vl2
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
; SVE-NEXT: cmpne p1.s, p0/z, z2.s, #0
; SVE-NEXT: udiv z0.s, p1/m, z0.s, z1.s
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE-NEXT: ret
%res = call <2 x i32> @llvm.masked.udiv(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m)
ret <2 x i32> %res
}
; Promotion
define <4 x i16> @udiv_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m) {
; NEON-LABEL: udiv_v4i16:
; NEON: // %bb.0:
; NEON-NEXT: shl v2.4h, v2.4h, #15
; NEON-NEXT: // kill: def $d0 killed $d0 def $q0
; NEON-NEXT: umov w8, v0.h[1]
; NEON-NEXT: cmlt v2.4h, v2.4h, #0
; NEON-NEXT: and v1.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v2.8b, v2.8b
; NEON-NEXT: sub v1.4h, v1.4h, v2.4h
; NEON-NEXT: umov w9, v1.h[1]
; NEON-NEXT: umov w10, v1.h[0]
; NEON-NEXT: umov w11, v1.h[2]
; NEON-NEXT: umov w12, v1.h[3]
; NEON-NEXT: udiv w8, w8, w9
; NEON-NEXT: umov w9, v0.h[0]
; NEON-NEXT: udiv w9, w9, w10
; NEON-NEXT: umov w10, v0.h[2]
; NEON-NEXT: udiv w10, w10, w11
; NEON-NEXT: umov w11, v0.h[3]
; NEON-NEXT: fmov s0, w9
; NEON-NEXT: mov v0.h[1], w8
; NEON-NEXT: udiv w8, w11, w12
; NEON-NEXT: mov v0.h[2], w10
; NEON-NEXT: mov v0.h[3], w8
; NEON-NEXT: // kill: def $d0 killed $d0 killed $q0
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v4i16:
; SVE: // %bb.0:
; SVE-NEXT: shl v2.4h, v2.4h, #15
; SVE-NEXT: ushll v0.4s, v0.4h, #0
; SVE-NEXT: ptrue p0.s, vl4
; SVE-NEXT: cmlt v2.4h, v2.4h, #0
; SVE-NEXT: and v1.8b, v1.8b, v2.8b
; SVE-NEXT: mvn v2.8b, v2.8b
; SVE-NEXT: sub v1.4h, v1.4h, v2.4h
; SVE-NEXT: ushll v1.4s, v1.4h, #0
; SVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; SVE-NEXT: xtn v0.4h, v0.4s
; SVE-NEXT: ret
%res = call <4 x i16> @llvm.masked.udiv(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m)
ret <4 x i16> %res
}
; Scalarization
define <1 x i64> @udiv_v1i164(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m) {
; NEON-LABEL: udiv_v1i164:
; NEON: // %bb.0:
; NEON-NEXT: // kill: def $d1 killed $d1 def $q1
; NEON-NEXT: fmov x8, d1
; NEON-NEXT: // kill: def $d0 killed $d0 def $q0
; NEON-NEXT: fmov x9, d0
; NEON-NEXT: tst w0, #0x1
; NEON-NEXT: csinc x8, x8, xzr, ne
; NEON-NEXT: udiv x8, x9, x8
; NEON-NEXT: fmov d0, x8
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v1i164:
; SVE: // %bb.0:
; SVE-NEXT: // kill: def $d1 killed $d1 def $q1
; SVE-NEXT: fmov x8, d1
; SVE-NEXT: // kill: def $d0 killed $d0 def $q0
; SVE-NEXT: fmov x9, d0
; SVE-NEXT: tst w0, #0x1
; SVE-NEXT: csinc x8, x8, xzr, ne
; SVE-NEXT: udiv x8, x9, x8
; SVE-NEXT: fmov d0, x8
; SVE-NEXT: ret
%res = call <1 x i64> @llvm.masked.udiv(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m)
ret <1 x i64> %res
}
; Expansion
define <2 x i128> @udiv_v2i128(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m) nounwind {
; NEON-LABEL: udiv_v2i128:
; NEON: // %bb.0:
; NEON-NEXT: stp x30, x25, [sp, #-64]! // 16-byte Folded Spill
; NEON-NEXT: // kill: def $d0 killed $d0 def $q0
; NEON-NEXT: fmov w8, s0
; NEON-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; NEON-NEXT: mov x21, x3
; NEON-NEXT: mov x22, x2
; NEON-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; NEON-NEXT: mov w25, v0.s[1]
; NEON-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; NEON-NEXT: mov x19, x7
; NEON-NEXT: mov x20, x6
; NEON-NEXT: tst w8, #0x1
; NEON-NEXT: csel x3, x5, xzr, ne
; NEON-NEXT: csinc x2, x4, xzr, ne
; NEON-NEXT: bl __udivti3
; NEON-NEXT: tst w25, #0x1
; NEON-NEXT: mov x23, x0
; NEON-NEXT: mov x24, x1
; NEON-NEXT: csel x3, x19, xzr, ne
; NEON-NEXT: csinc x2, x20, xzr, ne
; NEON-NEXT: mov x0, x22
; NEON-NEXT: mov x1, x21
; NEON-NEXT: bl __udivti3
; NEON-NEXT: mov x2, x0
; NEON-NEXT: mov x3, x1
; NEON-NEXT: mov x0, x23
; NEON-NEXT: mov x1, x24
; NEON-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; NEON-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; NEON-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; NEON-NEXT: ldp x30, x25, [sp], #64 // 16-byte Folded Reload
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v2i128:
; SVE: // %bb.0:
; SVE-NEXT: stp x30, x25, [sp, #-64]! // 16-byte Folded Spill
; SVE-NEXT: // kill: def $d0 killed $d0 def $q0
; SVE-NEXT: fmov w8, s0
; SVE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; SVE-NEXT: mov x21, x3
; SVE-NEXT: mov x22, x2
; SVE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; SVE-NEXT: mov w25, v0.s[1]
; SVE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SVE-NEXT: mov x19, x7
; SVE-NEXT: mov x20, x6
; SVE-NEXT: tst w8, #0x1
; SVE-NEXT: csel x3, x5, xzr, ne
; SVE-NEXT: csinc x2, x4, xzr, ne
; SVE-NEXT: bl __udivti3
; SVE-NEXT: tst w25, #0x1
; SVE-NEXT: mov x23, x0
; SVE-NEXT: mov x24, x1
; SVE-NEXT: csel x3, x19, xzr, ne
; SVE-NEXT: csinc x2, x20, xzr, ne
; SVE-NEXT: mov x0, x22
; SVE-NEXT: mov x1, x21
; SVE-NEXT: bl __udivti3
; SVE-NEXT: mov x2, x0
; SVE-NEXT: mov x3, x1
; SVE-NEXT: mov x0, x23
; SVE-NEXT: mov x1, x24
; SVE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; SVE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; SVE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; SVE-NEXT: ldp x30, x25, [sp], #64 // 16-byte Folded Reload
; SVE-NEXT: ret
%res = call <2 x i128> @llvm.masked.udiv(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m)
ret <2 x i128> %res
}
; Promotion and widening
define <3 x i10> @udiv_v3i10(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) {
; NEON-LABEL: udiv_v3i10:
; NEON: // %bb.0:
; NEON-NEXT: movi v0.2d, #0000000000000000
; NEON-NEXT: fmov s1, w3
; NEON-NEXT: ldr w8, [sp]
; NEON-NEXT: fmov s2, w0
; NEON-NEXT: mov v1.h[1], w4
; NEON-NEXT: mov v0.h[0], w6
; NEON-NEXT: mov v2.h[1], w1
; NEON-NEXT: mov v1.h[2], w5
; NEON-NEXT: mov v0.h[1], w7
; NEON-NEXT: mov v2.h[2], w2
; NEON-NEXT: bic v1.4h, #252, lsl #8
; NEON-NEXT: mov v0.h[2], w8
; NEON-NEXT: bic v2.4h, #252, lsl #8
; NEON-NEXT: umov w9, v2.h[0]
; NEON-NEXT: shl v0.4h, v0.4h, #15
; NEON-NEXT: cmlt v0.4h, v0.4h, #0
; NEON-NEXT: and v1.8b, v1.8b, v0.8b
; NEON-NEXT: mvn v0.8b, v0.8b
; NEON-NEXT: sub v0.4h, v1.4h, v0.4h
; NEON-NEXT: umov w8, v0.h[0]
; NEON-NEXT: and w8, w8, #0x3ff
; NEON-NEXT: udiv w0, w9, w8
; NEON-NEXT: umov w8, v0.h[1]
; NEON-NEXT: umov w9, v2.h[1]
; NEON-NEXT: and w8, w8, #0x3ff
; NEON-NEXT: udiv w1, w9, w8
; NEON-NEXT: umov w8, v0.h[2]
; NEON-NEXT: umov w9, v2.h[2]
; NEON-NEXT: and w8, w8, #0x3ff
; NEON-NEXT: udiv w2, w9, w8
; NEON-NEXT: ret
;
; SVE-LABEL: udiv_v3i10:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: fmov s1, w3
; SVE-NEXT: ldr w8, [sp]
; SVE-NEXT: fmov s2, w0
; SVE-NEXT: ptrue p0.s, vl4
; SVE-NEXT: mov v1.h[1], w4
; SVE-NEXT: mov v0.h[0], w6
; SVE-NEXT: mov v2.h[1], w1
; SVE-NEXT: mov v1.h[2], w5
; SVE-NEXT: mov v0.h[1], w7
; SVE-NEXT: mov v2.h[2], w2
; SVE-NEXT: bic v1.4h, #252, lsl #8
; SVE-NEXT: mov v0.h[2], w8
; SVE-NEXT: bic v2.4h, #252, lsl #8
; SVE-NEXT: shl v0.4h, v0.4h, #15
; SVE-NEXT: cmlt v0.4h, v0.4h, #0
; SVE-NEXT: and v1.8b, v1.8b, v0.8b
; SVE-NEXT: mvn v0.8b, v0.8b
; SVE-NEXT: sub v0.4h, v1.4h, v0.4h
; SVE-NEXT: ushll v1.4s, v2.4h, #0
; SVE-NEXT: ushll v0.4s, v0.4h, #0
; SVE-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; SVE-NEXT: xtn v0.4h, v0.4s
; SVE-NEXT: umov w0, v0.h[0]
; SVE-NEXT: umov w1, v0.h[1]
; SVE-NEXT: umov w2, v0.h[2]
; SVE-NEXT: ret
%res = call <3 x i10> @llvm.masked.udiv(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m)
ret <3 x i10> %res
}