[NFC][LLVM] Add test coverage for all binops to sve-intrinsic-simplify-binop.ll. Also adds sve-intrinsic-simplify-shift.ll to test asr, shl and shr.
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll index 7da55a1..85fc02cb 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
@@ -16,9 +16,20 @@ ret <vscale x 4 x i32> %r } +; Operation is not commutative. +define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_1( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> [[A]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> %a) + ret <vscale x 4 x i32> %r +} + ; Inactive lanes are important, which make the operation non-commutative. -define <vscale x 4 x i32> @cannot_commute_constant_to_rhs(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 { -; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs( +define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_2( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> [[A]]) ; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] @@ -96,6 +107,106 @@ ; enable simplification. Given the simplications themselves are common code, it ; is assumed they are already well tested elsewhere. +define <vscale x 4 x i32> @constant_add(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_add( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_add_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_add_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_and(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_and( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_and_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_and_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_eor(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_eor( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_eor_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_eor_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x float> @constant_fadd(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fadd( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @constant_fadd_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fadd_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @constant_fdiv(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fdiv( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 1.200000e+01), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 12.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fdiv_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { @@ -115,6 +226,26 @@ ret <vscale x 4 x float> %r } +define <vscale x 4 x float> @constant_fsub(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fsub( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x float> @constant_fsub_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00)) +; CHECK-NEXT: ret <vscale x 4 x float> [[R]] +; + %r = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0)) + ret <vscale x 4 x float> %r +} + define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { @@ -134,13 +265,179 @@ ret <vscale x 4 x i32> %r } +define <vscale x 4 x i32> @constant_orr(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_orr( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} -; repeat only the constant fold tests for fmul(.u) +define <vscale x 4 x i32> @constant_orr_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_orr_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_sdiv(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support divide-by-zero. +define <vscale x 4 x i32> @constant_sdiv_by_zero(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_by_zero( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 0)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support overflow. +define <vscale x 4 x i32> @constant_sdiv_with_overflow(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_with_overflow( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_sdiv_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support divide-by-zero. +define <vscale x 4 x i32> @constant_sdiv_u_by_zero(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u_by_zero( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 0)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support overflow. +define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u_with_overflow( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1)) + ret <vscale x 4 x i32> %r +} +define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support divide-by-zero. +define <vscale x 4 x i32> @constant_udiv_by_zero(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_by_zero( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 0)) + ret <vscale x 4 x i32> %r +} + +define <vscale x 4 x i32> @constant_udiv_u(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_u( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent does not support divide-by-zero. +define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_u_by_zero( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 0)) + ret <vscale x 4 x i32> %r +} declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) + +declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) + +declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll new file mode 100644 index 0000000..400dc0d --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
@@ -0,0 +1,353 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_0( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0)) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_1( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1)) + ret <vscale x 16 x i8> %r +} + +; data = 0x80 +define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_7( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7)) + ret <vscale x 16 x i8> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-7. +; data = 0x80 +define <vscale x 16 x i8> @constant_asr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_8( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 8)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 8)) + ret <vscale x 16 x i8> %r +} + +; data = 0x8000 +define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_15( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15)) + ret <vscale x 8 x i16> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-15. +; data = 0x8000 +define <vscale x 8 x i16> @constant_asr_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_16( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 16)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 16)) + ret <vscale x 8 x i16> %r +} + +; data = 0x800000000 +define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_31( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-31. +; data = 0x80000000 +define <vscale x 4 x i32> @constant_asr_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_32( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 32)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 32)) + ret <vscale x 4 x i32> %r +} + +; data = 0x8000000000000000 +define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_63( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63)) + ret <vscale x 2 x i64> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-63. +; data = 0x8000000000000000 +define <vscale x 2 x i64> @constant_asr_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_64( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 64)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 64)) + ret <vscale x 2 x i64> %r +} + +define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0)) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1)) + ret <vscale x 16 x i8> %r +} + +; result = 0x80 +define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7)) + ret <vscale x 16 x i8> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-7. +define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 8)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 8)) + ret <vscale x 16 x i8> %r +} + +; result = 0x8000 +define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15)) + ret <vscale x 8 x i16> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-15. +define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 16)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 16)) + ret <vscale x 8 x i16> %r +} + +; result = 0x800000000 +define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-31. +define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 32)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 32)) + ret <vscale x 4 x i32> %r +} + +; result = 0x8000000000000000 +define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63)) + ret <vscale x 2 x i64> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-63. +define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 64)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 64)) + ret <vscale x 2 x i64> %r +} + +define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0)) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1)) + ret <vscale x 16 x i8> %r +} + +; data = 0x80 +define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7)) + ret <vscale x 16 x i8> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-7. +; data = 0x80 +define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 8)) +; CHECK-NEXT: ret <vscale x 16 x i8> [[R]] +; + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 8)) + ret <vscale x 16 x i8> %r +} + +; data = 0x8000 +define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15)) + ret <vscale x 8 x i16> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-15. +; data = 0x8000 +define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 16)) +; CHECK-NEXT: ret <vscale x 8 x i16> [[R]] +; + %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 16)) + ret <vscale x 8 x i16> %r +} + +; data = 0x800000000 +define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31)) + ret <vscale x 4 x i32> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-31. +; data = 0x80000000 +define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 32)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 32)) + ret <vscale x 4 x i32> %r +} + +; data = 0x8000000000000000 +define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63)) + ret <vscale x 2 x i64> %r +} + +; The intrinsic's IR equivalent only supports shift amounts in the range 0-63. +; data = 0x8000000000000000 +define <vscale x 2 x i64> @constant_lsr_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_64( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 64)) +; CHECK-NEXT: ret <vscale x 2 x i64> [[R]] +; + %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 64)) + ret <vscale x 2 x i64> %r +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +attributes #0 = { "target-features"="+sve" }