[NFC][LLVM] Add test coverage for all binops to sve-intrinsic-simplify-binop.ll. Also adds sve-intrinsic-simplify-shift.ll to test asr, shl and shr.

diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
index 7da55a1..85fc02cb 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll

@@ -16,9 +16,20 @@
   ret <vscale x 4 x i32> %r
 }
 
+; Operation is not commutative.
+define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_1(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> %a)
+  ret <vscale x 4 x i32> %r
+}
+
 ; Inactive lanes are important, which make the operation non-commutative.
-define <vscale x 4 x i32> @cannot_commute_constant_to_rhs(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs(
+define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @cannot_commute_constant_to_rhs_2(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 303), <vscale x 4 x i32> [[A]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
@@ -96,6 +107,106 @@
 ; enable simplification. Given the simplications themselves are common code, it
 ; is assumed they are already well tested elsewhere.
 
+define <vscale x 4 x i32> @constant_add(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_add(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_add_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_add_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_and(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_and(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_and_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_and_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 14))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_eor(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_eor(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_eor_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_eor_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x float> @constant_fadd(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fadd(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @constant_fadd_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fadd_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @constant_fdiv(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fdiv(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 1.200000e+01), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 12.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fdiv_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
 define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -115,6 +226,26 @@
   ret <vscale x 4 x float> %r
 }
 
+define <vscale x 4 x float> @constant_fsub(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fsub(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @constant_fsub_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 7.000000e+00), <vscale x 4 x float> splat (float 6.000000e+00))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 7.0), <vscale x 4 x float> splat (float 6.0))
+  ret <vscale x 4 x float> %r
+}
+
 define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
@@ -134,13 +265,179 @@
   ret <vscale x 4 x i32> %r
 }
 
+define <vscale x 4 x i32> @constant_orr(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_orr(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
 
-; repeat only the constant fold tests for fmul(.u)
+define <vscale x 4 x i32> @constant_orr_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_orr_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 13), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_sdiv(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_sdiv_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 0))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support overflow.
+define <vscale x 4 x i32> @constant_sdiv_with_overflow(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_with_overflow(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_sdiv_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_sdiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 0))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support overflow.
+define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
+  ret <vscale x 4 x i32> %r
+}
+define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_udiv_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 0))
+  ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @constant_udiv_u(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_u(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent does not support divide-by-zero.
+define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_u_by_zero(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 0))
+  ret <vscale x 4 x i32> %r
+}
 
 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 
 attributes #0 = { "target-features"="+sve" }

diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
new file mode 100644
index 0000000..400dc0d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll

@@ -0,0 +1,353 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
+  ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
+  ret <vscale x 16 x i8> %r
+}
+
+; data = 0x80
+define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
+  ret <vscale x 16 x i8> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-7.
+; data = 0x80
+define <vscale x 16 x i8> @constant_asr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_8(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 8))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 8))
+  ret <vscale x 16 x i8> %r
+}
+
+; data = 0x8000
+define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
+  ret <vscale x 8 x i16> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-15.
+; data = 0x8000
+define <vscale x 8 x i16> @constant_asr_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_16(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 16))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 16))
+  ret <vscale x 8 x i16> %r
+}
+
+; data = 0x800000000
+define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-31.
+; data = 0x80000000
+define <vscale x 4 x i32> @constant_asr_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_32(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 32))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 32))
+  ret <vscale x 4 x i32> %r
+}
+
+; data = 0x8000000000000000
+define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+  ret <vscale x 2 x i64> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-63.
+; data = 0x8000000000000000
+define <vscale x 2 x i64> @constant_asr_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_64(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 64))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 64))
+  ret <vscale x 2 x i64> %r
+}
+
+define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
+  ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
+  ret <vscale x 16 x i8> %r
+}
+
+; result = 0x80
+define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
+  ret <vscale x 16 x i8> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-7.
+define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 8))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 8))
+  ret <vscale x 16 x i8> %r
+}
+
+; result = 0x8000
+define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
+  ret <vscale x 8 x i16> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-15.
+define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 16))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 16))
+  ret <vscale x 8 x i16> %r
+}
+
+; result = 0x800000000
+define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-31.
+define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 32))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 32))
+  ret <vscale x 4 x i32> %r
+}
+
+; result = 0x8000000000000000
+define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
+  ret <vscale x 2 x i64> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-63.
+define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 64))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 64))
+  ret <vscale x 2 x i64> %r
+}
+
+define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
+  ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
+  ret <vscale x 16 x i8> %r
+}
+
+; data = 0x80
+define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
+  ret <vscale x 16 x i8> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-7.
+; data = 0x80
+define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 8))
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+;
+  %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 8))
+  ret <vscale x 16 x i8> %r
+}
+
+; data = 0x8000
+define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
+  ret <vscale x 8 x i16> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-15.
+; data = 0x8000
+define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 16))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
+;
+  %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 16))
+  ret <vscale x 8 x i16> %r
+}
+
+; data = 0x800000000
+define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
+  ret <vscale x 4 x i32> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-31.
+; data = 0x80000000
+define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 32))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 32))
+  ret <vscale x 4 x i32> %r
+}
+
+; data = 0x8000000000000000
+define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+  ret <vscale x 2 x i64> %r
+}
+
+; The intrinsic's IR equivalent only supports shift amounts in the range 0-63.
+; data = 0x8000000000000000
+define <vscale x 2 x i64> @constant_lsr_i64_shift_by_64(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_64(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 64))
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 64))
+  ret <vscale x 2 x i64> %r
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+attributes #0 = { "target-features"="+sve" }