test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \
 ; RUN:     | FileCheck %s
 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
 ; RUN:     | FileCheck %s -check-prefix=DISABLED

 ; Support ASan instrumentation for constant-mask llvm.vp.{load,store}

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

 ;;;;;;;;;;;;;;;; STORE
 declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind

 define void @store.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @store.v4f32.variable(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
 ; CHECK:       6:
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
 ; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP8]])
 ; CHECK-NEXT:    br label [[TMP9]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP10]]
 ; CHECK:       10:
 ; CHECK-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @store.v4f32.variable(
 ; DISABLED-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret void
 ;
   tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl)
   ret void
 }

 ;; Store using two vp.stores, which should instrument them both.
 define void @store.v4f32.1010.split(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @store.v4f32.1010.split(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
 ; CHECK:       6:
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
 ; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP8]])
 ; CHECK-NEXT:    br label [[TMP9]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP10]]
 ; CHECK:       10:
 ; CHECK-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
 ; CHECK:       12:
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT1:%.*]]
 ; CHECK:       .split1:
 ; CHECK-NEXT:    [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i64 [[IV2]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
 ; CHECK:       16:
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
 ; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP18]])
 ; CHECK-NEXT:    br label [[TMP19]]
 ; CHECK:       19:
 ; CHECK-NEXT:    [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
 ; CHECK-NEXT:    [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
 ; CHECK-NEXT:    br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
 ; CHECK:       .split1.split:
 ; CHECK-NEXT:    br label [[TMP20]]
 ; CHECK:       20:
 ; CHECK-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @store.v4f32.1010.split(
 ; DISABLED-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
 ; DISABLED-NEXT:    ret void
 ;
   tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 %evl)
   tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
   ret void
 }

 ;; Store using a vp.store after a full store. Shouldn't instrument the second one.
 define void @store.v4f32.0010.after.full.store(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @store.v4f32.0010.after.full.store(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    call void @__asan_store16(i64 [[TMP1]])
 ; CHECK-NEXT:    store <4 x float> [[ARG:%.*]], ptr [[P]], align 16
 ; CHECK-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @store.v4f32.0010.after.full.store(
 ; DISABLED-NEXT:    store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16
 ; DISABLED-NEXT:    tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret void
 ;
   store <4 x float> %arg, ptr %p
   tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
   ret void
 }

 ;;;;;;;;;;;;;;;; LOAD
 declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind
 declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind

 define <4 x float> @load.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @load.v4f32.variable(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
 ; CHECK:       6:
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP8]])
 ; CHECK-NEXT:    br label [[TMP9]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP10]]
 ; CHECK:       10:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
 ; DISABLED-LABEL: @load.v4f32.variable(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <4 x float> [[RES]]
 ;
   %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl)
   ret <4 x float> %res
 }

 ;; Load using two vp.loads, which should instrument them both.
 define <4 x float> @load.v4f32.1001.split(ptr align 4 %p, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @load.v4f32.1001.split(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
 ; CHECK:       6:
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP8]])
 ; CHECK-NEXT:    br label [[TMP9]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP10]]
 ; CHECK:       10:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
 ; CHECK:       12:
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
 ; CHECK-NEXT:    br label [[DOTSPLIT1:%.*]]
 ; CHECK:       .split1:
 ; CHECK-NEXT:    [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i64 [[IV2]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
 ; CHECK:       16:
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP18]])
 ; CHECK-NEXT:    br label [[TMP19]]
 ; CHECK:       19:
 ; CHECK-NEXT:    [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
 ; CHECK-NEXT:    [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
 ; CHECK-NEXT:    br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
 ; CHECK:       .split1.split:
 ; CHECK-NEXT:    br label [[TMP20]]
 ; CHECK:       20:
 ; CHECK-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
 ; CHECK-NEXT:    ret <4 x float> [[RES2]]
 ;
 ; DISABLED-LABEL: @load.v4f32.1001.split(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
 ; DISABLED-NEXT:    ret <4 x float> [[RES2]]
 ;
   %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 %evl)
   %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
   ret <4 x float> %res2
 }

 ;; Load using a vp.load after a full load. Shouldn't instrument the second one.
 define <4 x float> @load.v4f32.1001.after.full.load(ptr align 4 %p, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @load.v4f32.1001.after.full.load(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    call void @__asan_load16(i64 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16
 ; CHECK-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[RES2]]
 ;
 ; DISABLED-LABEL: @load.v4f32.1001.after.full.load(
 ; DISABLED-NEXT:    [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
 ; DISABLED-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <4 x float> [[RES2]]
 ;
   %res = load <4 x float>, ptr %p
   %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
   ret <4 x float> %res2
 }

 ;; Scalable vector tests
 ;; ---------------------------
 declare <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
 declare void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float>, ptr, <vscale x 4 x i1>, i32)

 define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.load.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP10]])
 ; CHECK-NEXT:    br label [[TMP11]]
 ; CHECK:       11:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP12]]
 ; CHECK:       12:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
 ; DISABLED-LABEL: @scalable.load.nxv4f32(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
   %res = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
   ret <vscale x 4 x float> %res
 }

 define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.store.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
 ; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP10]])
 ; CHECK-NEXT:    br label [[TMP11]]
 ; CHECK:       11:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP12]]
 ; CHECK:       12:
 ; CHECK-NEXT:    tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @scalable.store.nxv4f32(
 ; DISABLED-NEXT:    tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret void
 ;
   tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
   ret void
 }

 ; Test vp strided load stores.
 declare <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr, i32, <vscale x 4 x i1>, i32)
 declare void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float>, ptr, i32, <vscale x 4 x i1>, i32)

 define <vscale x 4 x float> @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.strided.load.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
 ; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP12]], i64 4)
 ; CHECK-NEXT:    br label [[TMP13]]
 ; CHECK:       13:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP14]]
 ; CHECK:       14:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
   %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
   ret <vscale x 4 x float> %res
 }

 define void @scalable.strided.store.nxv4f32(<vscale x 4 x float> %arg, ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.strided.store.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
 ; CHECK:       9:
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
 ; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP12]], i64 4)
 ; CHECK-NEXT:    br label [[TMP13]]
 ; CHECK:       13:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP14]]
 ; CHECK:       14:
 ; CHECK-NEXT:    tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @scalable.strided.store.nxv4f32(
 ; DISABLED-NEXT:    tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret void
 ;
   tail call void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float> %arg, ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
   ret void
 }

 ; Test the stride is a multiple of the pointer alignment.
 define <vscale x 4 x float> @scalable.strided.load.nxv4f32.align(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.strided.load.nxv4f32.align(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 [[IV]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP11]])
 ; CHECK-NEXT:    br label [[TMP12]]
 ; CHECK:       12:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP13]]
 ; CHECK:       13:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32.align(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
   %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 4, <vscale x 4 x i1> %mask, i32 %evl)
   ret <vscale x 4 x float> %res
 }

 ; Test vp gather and scatter.
 declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
 declare void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)

 define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.gather.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
 ; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP10]])
 ; CHECK-NEXT:    br label [[TMP11]]
 ; CHECK:       11:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP12]]
 ; CHECK:       12:
 ; CHECK-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
 ; DISABLED-LABEL: @scalable.gather.nxv4f32(
 ; DISABLED-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret <vscale x 4 x float> [[RES]]
 ;
   %res = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)
   ret <vscale x 4 x float> %res
 }

 define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %arg, <vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
 ; CHECK-LABEL: @scalable.scatter.nxv4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
 ; CHECK:       2:
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[EVL]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
 ; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
 ; CHECK:       .split:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
 ; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP10]])
 ; CHECK-NEXT:    br label [[TMP11]]
 ; CHECK:       11:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
 ; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
 ; CHECK:       .split.split:
 ; CHECK-NEXT:    br label [[TMP12]]
 ; CHECK:       12:
 ; CHECK-NEXT:    tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
 ; CHECK-NEXT:    ret void
 ;
 ; DISABLED-LABEL: @scalable.scatter.nxv4f32(
 ; DISABLED-NEXT:    tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; DISABLED-NEXT:    ret void
 ;
   tail call void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float> %arg, <vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)
   ret void
 }