[VPlan] Set correct flags when creating and cloning VPWidenCastRecipe.
Make sure that we set the correct wrap flags when creating new
VPWidenCastRecipes for truncs and preserve the flags from the recipe
directly when cloning, to make sure they are not dropped.
Fixes https://github.com/llvm/llvm-project/issues/160396
GitOrigin-RevId: 70a26da63992142ba2221f1034048ea883cdcb3d
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index e64cefd..0822511 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -705,6 +705,9 @@
VPIRFlags(WrapFlagsTy WrapFlags)
: OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
+ VPIRFlags(TruncFlagsTy TruncFlags)
+ : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
+
VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
VPIRFlags(DisjointFlagsTy DisjointFlags)
@@ -1494,9 +1497,10 @@
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
const VPIRFlags &Flags = {},
+ const VPIRMetadata &Metadata = {},
DebugLoc DL = DebugLoc::getUnknown())
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
- VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) {
+ VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
assert(flagsValidForOpcode(Opcode) &&
"Set flags not supported for the provided opcode");
}
@@ -1504,11 +1508,11 @@
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
+ auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
+ *this, getDebugLoc());
if (auto *UV = getUnderlyingValue())
- return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
- *cast<CastInst>(UV));
-
- return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
+ New->setUnderlyingValue(UV);
+ return New;
}
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
diff --git a/lib/Transforms/Vectorize/VPlanRecipes.cpp b/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa3de36..deb64bf 100644
--- a/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2016,13 +2016,13 @@
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+ Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc ||
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
Opcode == VPInstruction::WideIVStep ||
Opcode == VPInstruction::ReductionStartVector ||
Opcode == VPInstruction::ComputeReductionResult;
case OperationType::NonNegOp:
- return Opcode == Instruction::ZExt;
- break;
+ return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
case OperationType::Cmp:
return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
case OperationType::Other:
diff --git a/lib/Transforms/Vectorize/VPlanTransforms.cpp b/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 84f0205..58fab8f 100644
--- a/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2195,7 +2195,8 @@
auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);
VPWidenCastRecipe *NewOp =
IterIsEmpty
- ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
+ ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy,
+ VPIRFlags::TruncFlagsTy(false, false))
: ProcessedIter->second;
R.setOperand(Idx, NewOp);
if (!IterIsEmpty)
@@ -3566,13 +3567,13 @@
Mul, Ext0, Ext1, Ext)) {
auto *NewExt0 = new VPWidenCastRecipe(
Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
- Ext0->getDebugLoc());
+ *Ext0, Ext0->getDebugLoc());
NewExt0->insertBefore(Ext0);
VPWidenCastRecipe *NewExt1 = NewExt0;
if (Ext0 != Ext1) {
NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
- Ext->getResultType(), *Ext1,
+ Ext->getResultType(), *Ext1, *Ext1,
Ext1->getDebugLoc());
NewExt1->insertBefore(Ext1);
}
diff --git a/test/Transforms/LoopVectorize/cse-casts.ll b/test/Transforms/LoopVectorize/cse-casts.ll
new file mode 100644
index 0000000..e923560
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cse-casts.ll
@@ -0,0 +1,351 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
+
+define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define i8 @preserve_flags_when_cloning_trunc(
+; CHECK-SAME: i8 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> splat (i8 1), i8 [[START]], i32 0
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i8> [ splat (i8 1), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4
+; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP4]], align 2
+; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP5]], align 2
+; CHECK-NEXT: [[TMP6]] = mul <4 x i8> [[VEC_PHI]], splat (i8 3)
+; CHECK-NEXT: [[TMP7]] = mul <4 x i8> [[VEC_PHI1]], splat (i8 3)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 416
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <4 x i8> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> [[BIN_RDX]])
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %red = phi i8 [ %red.next, %loop ], [ %start, %entry ]
+ %l = load i32, ptr %src, align 4
+ %cmp = icmp ne i32 %l, 0
+ %cmp.ext = zext i1 %cmp to i64
+ %cmp.trunc = trunc i64 %cmp.ext to i16
+ %gep.dst = getelementptr i16, ptr %dst, i64 %iv
+ store i16 %cmp.trunc, ptr %gep.dst, align 2
+ %red.next = mul i8 %red, 3
+ %iv.next = add i64 %iv, 1
+ %ec = icmp ult i64 %iv, 416
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret i8 %red.next
+}
+
+
+define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: define void @preserve_flags_narrowing_extends_and_truncs(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> [[TMP3]], i8 [[TMP5]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ [[TMP3]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP6]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK: [[PRED_LOAD_IF3]]:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP9]], i32 2
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
+; CHECK: [[PRED_LOAD_CONTINUE4]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i8> [ [[TMP7]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP10]], %[[PRED_LOAD_IF3]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]]
+; CHECK: [[PRED_LOAD_IF5]]:
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3
+; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP11]], i8 [[TMP13]], i32 3
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
+; CHECK: [[PRED_LOAD_CONTINUE6]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP11]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP14]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
+; CHECK: [[PRED_LOAD_IF7]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> poison, i8 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]]
+; CHECK: [[PRED_LOAD_CONTINUE8]]:
+; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i8> [ poison, %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP18]], %[[PRED_LOAD_IF7]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]]
+; CHECK: [[PRED_LOAD_IF9]]:
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 5
+; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP21]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE10]]
+; CHECK: [[PRED_LOAD_CONTINUE10]]:
+; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i8> [ [[TMP19]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP22]], %[[PRED_LOAD_IF9]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12:.*]]
+; CHECK: [[PRED_LOAD_IF11]]:
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 6
+; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i8> [[TMP23]], i8 [[TMP25]], i32 2
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE12]]
+; CHECK: [[PRED_LOAD_CONTINUE12]]:
+; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ [[TMP23]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP26]], %[[PRED_LOAD_IF11]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF13:.*]], label %[[PRED_LOAD_CONTINUE14:.*]]
+; CHECK: [[PRED_LOAD_IF13]]:
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 7
+; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP29]], i32 3
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE14]]
+; CHECK: [[PRED_LOAD_CONTINUE14]]:
+; CHECK-NEXT: [[TMP31:%.*]] = phi <4 x i8> [ [[TMP27]], %[[PRED_LOAD_CONTINUE12]] ], [ [[TMP30]], %[[PRED_LOAD_IF13]] ]
+; CHECK-NEXT: [[TMP32:%.*]] = zext <4 x i8> [[TMP15]] to <4 x i64>
+; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64>
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0
+; CHECK-NEXT: store i64 [[TMP35]], ptr [[TMP34]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK: [[PRED_STORE_IF15]]:
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[TMP32]], i32 1
+; CHECK-NEXT: store i64 [[TMP37]], ptr [[TMP36]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; CHECK: [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK: [[PRED_STORE_IF17]]:
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP32]], i32 2
+; CHECK-NEXT: store i64 [[TMP39]], ptr [[TMP38]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; CHECK: [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK: [[PRED_STORE_IF19]]:
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 3
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[TMP32]], i32 3
+; CHECK-NEXT: store i64 [[TMP41]], ptr [[TMP40]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; CHECK: [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 4
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[TMP33]], i32 0
+; CHECK-NEXT: store i64 [[TMP43]], ptr [[TMP42]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; CHECK: [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK: [[PRED_STORE_IF23]]:
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 5
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[TMP33]], i32 1
+; CHECK-NEXT: store i64 [[TMP45]], ptr [[TMP44]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; CHECK: [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK: [[PRED_STORE_IF25]]:
+; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 6
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i64> [[TMP33]], i32 2
+; CHECK-NEXT: store i64 [[TMP47]], ptr [[TMP46]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; CHECK: [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK: [[PRED_STORE_IF27]]:
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 7
+; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i64> [[TMP33]], i32 3
+; CHECK-NEXT: store i64 [[TMP49]], ptr [[TMP48]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1
+; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2
+; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3
+; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1
+; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2
+; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3
+; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 4
+; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 5
+; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 6
+; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 7
+; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP58]], i32 0
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x ptr> [[TMP62]], ptr [[TMP59]], i32 1
+; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x ptr> [[TMP63]], ptr [[TMP60]], i32 2
+; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; CHECK: [[PRED_LOAD_IF29]]:
+; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
+; CHECK: [[PRED_LOAD_CONTINUE30]]:
+; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i8> [ poison, %[[PRED_STORE_CONTINUE28]] ], [ [[TMP67]], %[[PRED_LOAD_IF29]] ]
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; CHECK: [[PRED_LOAD_IF31]]:
+; CHECK-NEXT: [[TMP69:%.*]] = load i8, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP68]], i8 [[TMP69]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
+; CHECK: [[PRED_LOAD_CONTINUE32]]:
+; CHECK-NEXT: [[TMP71:%.*]] = phi <4 x i8> [ [[TMP68]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP70]], %[[PRED_LOAD_IF31]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; CHECK: [[PRED_LOAD_IF33]]:
+; CHECK-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP52]], align 1
+; CHECK-NEXT: [[TMP73:%.*]] = insertelement <4 x i8> [[TMP71]], i8 [[TMP72]], i32 2
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
+; CHECK: [[PRED_LOAD_CONTINUE34]]:
+; CHECK-NEXT: [[TMP74:%.*]] = phi <4 x i8> [ [[TMP71]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP73]], %[[PRED_LOAD_IF33]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; CHECK: [[PRED_LOAD_IF35]]:
+; CHECK-NEXT: [[TMP75:%.*]] = load i8, ptr [[TMP53]], align 1
+; CHECK-NEXT: [[TMP76:%.*]] = insertelement <4 x i8> [[TMP74]], i8 [[TMP75]], i32 3
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
+; CHECK: [[PRED_LOAD_CONTINUE36]]:
+; CHECK-NEXT: [[TMP77:%.*]] = phi <4 x i8> [ [[TMP74]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP76]], %[[PRED_LOAD_IF35]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; CHECK: [[PRED_LOAD_IF37]]:
+; CHECK-NEXT: [[TMP78:%.*]] = load i8, ptr [[TMP58]], align 1
+; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i8> poison, i8 [[TMP78]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
+; CHECK: [[PRED_LOAD_CONTINUE38]]:
+; CHECK-NEXT: [[TMP80:%.*]] = phi <4 x i8> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP79]], %[[PRED_LOAD_IF37]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; CHECK: [[PRED_LOAD_IF39]]:
+; CHECK-NEXT: [[TMP81:%.*]] = load i8, ptr [[TMP59]], align 1
+; CHECK-NEXT: [[TMP82:%.*]] = insertelement <4 x i8> [[TMP80]], i8 [[TMP81]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
+; CHECK: [[PRED_LOAD_CONTINUE40]]:
+; CHECK-NEXT: [[TMP83:%.*]] = phi <4 x i8> [ [[TMP80]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP82]], %[[PRED_LOAD_IF39]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; CHECK: [[PRED_LOAD_IF41]]:
+; CHECK-NEXT: [[TMP84:%.*]] = load i8, ptr [[TMP60]], align 1
+; CHECK-NEXT: [[TMP85:%.*]] = insertelement <4 x i8> [[TMP83]], i8 [[TMP84]], i32 2
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
+; CHECK: [[PRED_LOAD_CONTINUE42]]:
+; CHECK-NEXT: [[TMP86:%.*]] = phi <4 x i8> [ [[TMP83]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP85]], %[[PRED_LOAD_IF41]] ]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
+; CHECK: [[PRED_LOAD_IF43]]:
+; CHECK-NEXT: [[TMP87:%.*]] = load i8, ptr [[TMP61]], align 1
+; CHECK-NEXT: [[TMP88:%.*]] = insertelement <4 x i8> [[TMP86]], i8 [[TMP87]], i32 3
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
+; CHECK: [[PRED_LOAD_CONTINUE44]]:
+; CHECK-NEXT: [[TMP89:%.*]] = phi <4 x i8> [ [[TMP86]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP88]], %[[PRED_LOAD_IF43]] ]
+; CHECK-NEXT: [[TMP90:%.*]] = trunc <4 x i8> [[TMP77]] to <4 x i1>
+; CHECK-NEXT: [[TMP91:%.*]] = trunc <4 x i8> [[TMP89]] to <4 x i1>
+; CHECK-NEXT: [[TMP92:%.*]] = and <4 x i1> [[TMP90]], splat (i1 true)
+; CHECK-NEXT: [[TMP93:%.*]] = and <4 x i1> [[TMP91]], splat (i1 true)
+; CHECK-NEXT: [[TMP94:%.*]] = select <4 x i1> [[TMP90]], <4 x float> splat (float 1.000000e+00), <4 x float> zeroinitializer
+; CHECK-NEXT: [[TMP95:%.*]] = select <4 x i1> [[TMP91]], <4 x float> splat (float 1.000000e+00), <4 x float> zeroinitializer
+; CHECK-NEXT: [[TMP96:%.*]] = select <4 x i1> [[TMP92]], <4 x float> splat (float 3.000000e+00), <4 x float> [[TMP94]]
+; CHECK-NEXT: [[TMP97:%.*]] = select <4 x i1> [[TMP93]], <4 x float> splat (float 3.000000e+00), <4 x float> [[TMP95]]
+; CHECK-NEXT: [[TMP98:%.*]] = bitcast <4 x float> [[TMP96]] to <4 x i32>
+; CHECK-NEXT: [[TMP99:%.*]] = bitcast <4 x float> [[TMP97]] to <4 x i32>
+; CHECK-NEXT: [[TMP100:%.*]] = trunc <4 x i32> [[TMP98]] to <4 x i8>
+; CHECK-NEXT: [[TMP101:%.*]] = trunc <4 x i32> [[TMP99]] to <4 x i8>
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
+; CHECK: [[PRED_STORE_IF45]]:
+; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0
+; CHECK-NEXT: store i8 [[TMP102]], ptr [[TMP50]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
+; CHECK: [[PRED_STORE_CONTINUE46]]:
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
+; CHECK: [[PRED_STORE_IF47]]:
+; CHECK-NEXT: [[TMP103:%.*]] = extractelement <4 x i8> [[TMP100]], i32 1
+; CHECK-NEXT: store i8 [[TMP103]], ptr [[TMP51]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE48]]
+; CHECK: [[PRED_STORE_CONTINUE48]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF49:.*]], label %[[PRED_STORE_CONTINUE50:.*]]
+; CHECK: [[PRED_STORE_IF49]]:
+; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i8> [[TMP100]], i32 2
+; CHECK-NEXT: store i8 [[TMP104]], ptr [[TMP52]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE50]]
+; CHECK: [[PRED_STORE_CONTINUE50]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF51:.*]], label %[[PRED_STORE_CONTINUE52:.*]]
+; CHECK: [[PRED_STORE_IF51]]:
+; CHECK-NEXT: [[TMP105:%.*]] = extractelement <4 x i8> [[TMP100]], i32 3
+; CHECK-NEXT: store i8 [[TMP105]], ptr [[TMP53]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE52]]
+; CHECK: [[PRED_STORE_CONTINUE52]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF53:.*]], label %[[PRED_STORE_CONTINUE54:.*]]
+; CHECK: [[PRED_STORE_IF53]]:
+; CHECK-NEXT: [[TMP106:%.*]] = extractelement <4 x i8> [[TMP101]], i32 0
+; CHECK-NEXT: store i8 [[TMP106]], ptr [[TMP58]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE54]]
+; CHECK: [[PRED_STORE_CONTINUE54]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF55:.*]], label %[[PRED_STORE_CONTINUE56:.*]]
+; CHECK: [[PRED_STORE_IF55]]:
+; CHECK-NEXT: [[TMP107:%.*]] = extractelement <4 x i8> [[TMP101]], i32 1
+; CHECK-NEXT: store i8 [[TMP107]], ptr [[TMP59]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE56]]
+; CHECK: [[PRED_STORE_CONTINUE56]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF57:.*]], label %[[PRED_STORE_CONTINUE58:.*]]
+; CHECK: [[PRED_STORE_IF57]]:
+; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i8> [[TMP101]], i32 2
+; CHECK-NEXT: store i8 [[TMP108]], ptr [[TMP60]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE58]]
+; CHECK: [[PRED_STORE_CONTINUE58]]:
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
+; CHECK: [[PRED_STORE_IF59]]:
+; CHECK-NEXT: [[TMP109:%.*]] = extractelement <4 x i8> [[TMP101]], i32 3
+; CHECK-NEXT: store i8 [[TMP109]], ptr [[TMP61]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE60]]
+; CHECK: [[PRED_STORE_CONTINUE60]]:
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
+ %l = load i8, ptr %gep.A
+ %l.ext = zext i8 %l to i64
+ %gep.C = getelementptr inbounds i8, ptr %C, i64 %iv
+ store i64 %l.ext, ptr %gep.C
+ %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
+ %l.1 = load i8, ptr %gep.B, align 1
+ %masked = and i8 %l.1, 1
+ %l.1.trunc = trunc i8 %l.1 to i1
+ %sel.0 = select i1 %l.1.trunc, float 1.000000e+00, float 0.000000e+00
+ %masked.trunc = trunc i8 %masked to i1
+ %sel.1 = select i1 %masked.trunc, float 3.000000e+00, float %sel.0
+ %bc = bitcast float %sel.1 to i32
+ %bc.trunc = trunc i32 %bc to i8
+ store i8 %bc.trunc, ptr %gep.B, align 1
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 1
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}