| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=gvn -S < %s | FileCheck %s |
| |
| ; Check that in both cases the second load is recognized as redundant |
| ; and is removed. |
| |
| define <128 x i8> @f0(ptr %a0, <128 x i8> %a1, <128 x i8> %a2) { |
| ; CHECK-LABEL: @f0( |
| ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] |
| ; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr align 4 [[A0:%.*]], <128 x i1> [[V0]], <128 x i8> undef) |
| ; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] |
| ; CHECK-NEXT: ret <128 x i8> [[V3]] |
| ; |
| %v0 = icmp eq <128 x i8> %a1, %a2 |
| %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) |
| %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) |
| %v3 = add <128 x i8> %v1, %v2 |
| ret <128 x i8> %v3 |
| } |
| |
| define <128 x i8> @f1(ptr %a0, <128 x i8> %a1, <128 x i8> %a2) { |
| ; CHECK-LABEL: @f1( |
| ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] |
| ; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, ptr [[A0:%.*]], i32 1 |
| ; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr align 4 [[A0]], <128 x i1> [[V0]], <128 x i8> undef) |
| ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[A2]], ptr align 4 [[V1]], <128 x i1> [[V0]]) |
| ; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] |
| ; CHECK-NEXT: ret <128 x i8> [[V4]] |
| ; |
| %v0 = icmp eq <128 x i8> %a1, %a2 |
| %v1 = getelementptr <128 x i8>, ptr %a0, i32 1 |
| %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) |
| call void @llvm.masked.store.v128i8.p0(<128 x i8> %a2, ptr %v1, i32 4, <128 x i1> %v0) |
| %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) |
| %v4 = add <128 x i8> %v2, %v3 |
| ret <128 x i8> %v4 |
| } |
| |
| define <4 x float> @forward_masked_load(ptr %0, ptr %1) { |
| ; CHECK-LABEL: @forward_masked_load( |
| ; CHECK-NEXT: [[LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) |
| ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD1]], ptr align 1 [[TMP1:%.*]], <4 x i1> splat (i1 true)) |
| ; CHECK-NEXT: ret <4 x float> [[LOAD1]] |
| ; |
| %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) |
| %load1 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| call void @llvm.masked.store.v4f32.p0(<4 x float> %load1, ptr %1, i32 1, <4 x i1> %mask) |
| %load2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| ret <4 x float> %load2 |
| } |
| |
| define <4 x float> @forward_masked_load_arbitrary_mask(ptr %loc_a, ptr %loc_b, <4 x i1> %mask) { |
| ; CHECK-LABEL: @forward_masked_load_arbitrary_mask( |
| ; CHECK-NEXT: [[LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[LOC_A:%.*]], <4 x i1> [[MASK:%.*]], <4 x float> zeroinitializer) |
| ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD1]], ptr align 1 [[LOC_B:%.*]], <4 x i1> [[MASK]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[MASK]], <4 x float> [[LOAD1]], <4 x float> zeroinitializer |
| ; CHECK-NEXT: ret <4 x float> [[TMP1]] |
| ; |
| %load1 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %loc_a, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| call void @llvm.masked.store.v4f32.p0(<4 x float> %load1, ptr %loc_b, i32 1, <4 x i1> %mask) |
| %load2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %loc_b, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| ret <4 x float> %load2 |
| } |
| |
| define <4 x float> @forward_binop_splat_i1_mask(ptr %0, ptr %1) { |
| ; CHECK-LABEL: @forward_binop_splat_i1_mask( |
| ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 |
| ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_16]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] |
| ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <4 x i1> splat (i1 true)) |
| ; CHECK-NEXT: ret <4 x float> [[FMUL]] |
| ; |
| %mask = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) |
| %load.0.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| %gep.0.16 = getelementptr i8, ptr %0, i32 16 |
| %load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| %fmul = fmul <4 x float> %load.0.0, %load.0.16 |
| call void @llvm.masked.store.v4f32.p0(<4 x float> %fmul, ptr %1, i32 1, <4 x i1> %mask) |
| %load.1.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| ret <4 x float> %load.1.0 |
| } |
| |
| define <4 x float> @forward_binop_with_sel(ptr %0, ptr %1, i32 %a, i32 %b, <4 x float> %passthrough) { |
| ; CHECK-LABEL: @forward_binop_with_sel( |
| ; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[A:%.*]], i32 [[B:%.*]]) |
| ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 |
| ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_16]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] |
| ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <4 x i1> [[MASK]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[MASK]], <4 x float> [[FMUL]], <4 x float> [[PASSTHROUGH:%.*]] |
| ; CHECK-NEXT: ret <4 x float> [[TMP3]] |
| ; |
| %mask = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %a, i32 %b) |
| %load.0.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| %gep.0.16 = getelementptr i8, ptr %0, i32 16 |
| %load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) |
| %fmul = fmul <4 x float> %load.0.0, %load.0.16 |
| call void @llvm.masked.store.v4f32.p0(<4 x float> %fmul, ptr %1, i32 1, <4 x i1> %mask) |
| %load.1.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> %passthrough) |
| ret <4 x float> %load.1.0 |
| } |
| |
| define <vscale x 4 x float> @forward_masked_load_scalable(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @forward_masked_load_scalable( |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[TMP3]], <vscale x 4 x float> [[PASSTHROUGH:%.*]]) |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD1]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <vscale x 4 x i1> [[TMP3]], <vscale x 4 x float> [[LOAD1]], <vscale x 4 x float> [[PASSTHROUGH]] |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP5]] |
| ; |
| %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load1 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> %passthrough) |
| call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load1, ptr %1, i32 1, <vscale x 4 x i1> %mask) |
| %load2 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load2 |
| } |
| |
| define <vscale x 4 x float> @forward_masked_load_scalable_type_mismatch(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @forward_masked_load_scalable_type_mismatch( |
| ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD1:%.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x double> zeroinitializer) |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[LOAD1]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[MASK]]) |
| ; CHECK-NEXT: [[LOAD2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> [[PASSTHROUGH:%.*]]) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD2]] |
| ; |
| %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load1 = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x double> zeroinitializer) |
| call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %load1, ptr %1, i32 1, <vscale x 4 x i1> %mask) |
| %load2 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load2 |
| } |
| |
| define <vscale x 4 x float> @generate_sel_with_passthrough(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @generate_sel_with_passthrough( |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[TMP3]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD1]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <vscale x 4 x i1> [[TMP3]], <vscale x 4 x float> [[LOAD1]], <vscale x 4 x float> [[PASSTHROUGH:%.*]] |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP5]] |
| ; |
| %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load1 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer) |
| call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load1, ptr %1, i32 1, <vscale x 4 x i1> %mask) |
| %load2 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load2 |
| } |
| |
| define <vscale x 4 x float> @forward_binop_with_sel_scalable(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @forward_binop_with_sel_scalable( |
| ; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 |
| ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[FMUL:%.*]] = fmul <vscale x 4 x float> [[LOAD_0_0]], [[LOAD_0_16]] |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[MASK]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> [[FMUL]], <vscale x 4 x float> [[PASSTHROUGH:%.*]] |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP3]] |
| ; |
| %mask = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load.0.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer) |
| %gep.0.16 = getelementptr i8, ptr %0, i32 16 |
| %load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer) |
| %fmul = fmul <vscale x 4 x float> %load.0.0, %load.0.16 |
| call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %fmul, ptr %1, i32 1, <vscale x 4 x i1> %mask) |
| %load.1.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load.1.0 |
| } |
| |
| define <vscale x 4 x float> @load_mask_differs(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @load_mask_differs( |
| ; CHECK-NEXT: [[MASK0:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) |
| ; CHECK-NEXT: [[MASK1:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[MASK0]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 |
| ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK0]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[FMUL:%.*]] = fmul <vscale x 4 x float> [[LOAD_0_0]], [[LOAD_0_16]] |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[MASK0]]) |
| ; CHECK-NEXT: [[LOAD_1_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK1]], <vscale x 4 x float> [[PASSTHROUGH:%.*]]) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD_1_0]] |
| ; |
| %mask0 = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) |
| %mask1 = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load.0.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask0, <vscale x 4 x float> zeroinitializer) |
| %gep.0.16 = getelementptr i8, ptr %0, i32 16 |
| %load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, <vscale x 4 x i1> %mask0, <vscale x 4 x float> zeroinitializer) |
| %fmul = fmul <vscale x 4 x float> %load.0.0, %load.0.16 |
| call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %fmul, ptr %1, i32 1, <vscale x 4 x i1> %mask0) |
| %load.1.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask1, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load.1.0 |
| } |
| |
| define <vscale x 4 x float> @store_mask_differs(ptr %0, ptr %1, <vscale x 4 x float> %passthrough) { |
| ; CHECK-LABEL: @store_mask_differs( |
| ; CHECK-NEXT: [[MASK0:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) |
| ; CHECK-NEXT: [[MASK1:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], <vscale x 4 x i1> [[MASK0]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 |
| ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK0]], <vscale x 4 x float> zeroinitializer) |
| ; CHECK-NEXT: [[FMUL:%.*]] = fmul <vscale x 4 x float> [[LOAD_0_0]], [[LOAD_0_16]] |
| ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <vscale x 4 x i1> [[MASK1]]) |
| ; CHECK-NEXT: [[LOAD_1_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK0]], <vscale x 4 x float> [[PASSTHROUGH:%.*]]) |
| ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD_1_0]] |
| ; |
| %mask0 = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) |
| %mask1 = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) |
| %load.0.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, <vscale x 4 x i1> %mask0, <vscale x 4 x float> zeroinitializer) |
| %gep.0.16 = getelementptr i8, ptr %0, i32 16 |
| %load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, <vscale x 4 x i1> %mask0, <vscale x 4 x float> zeroinitializer) |
| %fmul = fmul <vscale x 4 x float> %load.0.0, %load.0.16 |
| call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %fmul, ptr %1, i32 1, <vscale x 4 x i1> %mask1) |
| %load.1.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, <vscale x 4 x i1> %mask0, <vscale x 4 x float> %passthrough) |
| ret <vscale x 4 x float> %load.1.0 |
| } |