| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s |
| |
| define void @trip1_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip1_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 1 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip3_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 3 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip5_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 5 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip8_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 |
| ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP2]] |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true) |
| ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i8> [[VP_OP_LOAD]], splat (i8 1) |
| ; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i8> [[TMP6]], [[VP_OP_LOAD1]] |
| ; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) |
| ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br label [[FOR_END:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 8 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip16_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1) |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4:%.*]], align 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP2]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP4]], align 1 |
| ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br label [[FOR_END:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 16 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| |
| define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip32_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1) |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP4:%.*]], align 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <32 x i8> [[TMP2]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP4]], align 1 |
| ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br label [[FOR_END:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 32 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { |
| ; CHECK-LABEL: @trip24_i8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1) |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i8> [[TMP3]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP4]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 |
| ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br label [[FOR_END:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP8]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP9]] |
| ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = shl i8 %0, 1 |
| %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 |
| %1 = load i8, ptr %arrayidx1, align 1 |
| %add = add i8 %mul, %1 |
| store i8 %add, ptr %arrayidx1, align 1 |
| %inc = add nuw nsw i64 %i.08, 1 |
| %exitcond.not = icmp eq i64 %inc, 24 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| attributes #0 = { "target-features"="+v,+d" vscale_range(2, 1024) } |
| |
| ; This is a non-power-of-2 low trip count, so we will try to tail-fold this. But |
| ; the reduction is a multiply which is only legal for fixed-length VFs. But |
| ; fixed-length VFs aren't legal for the default tail-folding style |
| ; data-with-evl, so make sure we gracefully fall back to data-without-lane-mask. |
| |
| define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { |
| ; CHECK-LABEL: @mul_non_pow_2_low_trip_count( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer |
| ; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ule <16 x i64> [[VEC_IV]], splat (i64 9) |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) |
| ; CHECK-NEXT: [[TMP2]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP2]], <16 x i8> [[VEC_PHI]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 |
| ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> [[TMP3]]) |
| ; CHECK-NEXT: br label [[FOR_END:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 2, [[ENTRY]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[GEP]], align 1 |
| ; CHECK-NEXT: [[MUL]] = mul i8 [[TMP5]], [[RDX]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i8 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i8 [[MUL_LCSSA]] |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| %rdx = phi i8 [ 2, %entry ], [ %mul, %for.body ] |
| %gep = getelementptr i8, ptr %a, i64 %iv |
| %0 = load i8, ptr %gep |
| %mul = mul i8 %0, %rdx |
| %iv.next = add i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %iv.next, 10 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: ; preds = %for.body, %entry |
| ret i8 %mul |
| } |