| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^middle.block" --version 5 |
| ; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s |
| |
| ; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds) |
| ; are dropped from instructions in blocks that need predication and are linearized |
| ; and masked after vectorization. We only drop flags from scalar instructions that |
| ; contribute to the address computation of a masked vector load/store. After |
| ; linearizing the control flow and removing their guarding condition, these |
| ; instructions could generate a poison value which would be used as base address of |
| ; the masked vector load/store (see PR52111). For gather/scatter cases, |
| ; posiong-generating flags can be preserved since poison addresses in the vector GEP |
| ; reaching the gather/scatter instruction will be masked-out by the gather/scatter |
| ; instruction itself and won't be used. |
| ; We need AVX512 target features for the loop to be vectorized with masks instead of |
| ; predicates. |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-pc-linux-gnu" |
| |
| ; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. |
| ; Test for PR52111. |
| define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_scalar_nuw_nsw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0:![0-9]+]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sub nuw nsw i64 %iv, 1 |
| %i29 = getelementptr inbounds float, ptr %input, i64 %i27 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Variant with getelementptr nusw. |
| define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_scalar_gep_nusw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw float, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sub nuw nsw i64 %iv, 1 |
| %i29 = getelementptr nusw float, ptr %input, i64 %i27 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr nusw float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Variant with getelementptr nuw. |
| define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_scalar_gep_nuw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nuw float, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sub nuw nsw i64 %iv, 1 |
| %i29 = getelementptr nuw float, ptr %input, i64 %i27 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr nuw float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. |
| ; In this case, 'sub' and 'getelementptr' are not guarded by the predicate. |
| define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_nonpred_scalar_nuw_nsw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP3]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i27 = sub i64 %iv, 1 |
| %i29 = getelementptr float, ptr %input, i64 %i27 |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather. |
| define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @preserve_vector_nuw_nsw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[TMP2]], splat (i64 2) |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP3]] |
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sub nuw nsw i64 %iv, 1 |
| %i28 = mul nuw nsw i64 %i27, 2 |
| %i29 = getelementptr inbounds float, ptr %input, i64 %i28 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load. |
| define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_vector_nuw_nsw( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 |
| ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP4]], align 8 |
| ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP7]], i32 4, <4 x i1> [[TMP5]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| %gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv |
| %i27 = sub nuw nsw i64 %iv, 1 |
| %i29 = getelementptr inbounds float, ptr %input, i64 %i27 |
| store ptr %i29, ptr %gep |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Preserve poison-generating flags from 'sub', which is not contributing to any address computation |
| ; of any masked load/store/gather/scatter. |
| define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @preserve_nuw_nsw_no_addr( |
| ; CHECK-SAME: ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 |
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sub nuw nsw i64 %iv, 1 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] |
| %i35 = getelementptr inbounds i64, ptr %output, i64 %iv |
| store i64 %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load. |
| define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @drop_scalar_exact( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = sdiv i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP5]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP7]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i7 = icmp ne i64 %iv, 0 |
| %i8 = and i64 %iv, 1 |
| %i9 = icmp eq i64 %i8, 0 |
| %i10 = and i1 %i7, %i9 |
| br i1 %i10, label %if.end, label %if.then |
| |
| if.then: |
| %i26 = sdiv exact i64 %iv, 1 |
| %i29 = getelementptr inbounds float, ptr %input, i64 %i26 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { |
| ; CHECK-LABEL: define void @drop_zext_nneg( |
| ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> zeroinitializer |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 |
| ; CHECK-NEXT: store double [[TMP5]], ptr [[P1]], align 8 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 |
| ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %body |
| |
| body: |
| %iv = phi i64 [ %next, %else ], [ 0, %entry ] |
| %0 = trunc i64 %iv to i32 |
| %c = icmp eq i32 %0, 0 |
| br i1 %c, label %then, label %else |
| |
| then: |
| %zext = zext nneg i32 %0 to i64 |
| %idx1 = getelementptr double, ptr %p, i64 %zext |
| %idx2 = getelementptr double, ptr %p, i64 %zext |
| %1 = load double, ptr %idx2, align 8 |
| br label %else |
| |
| else: |
| %phi = phi double [ %1, %then ], [ 0.000000e+00, %body ] |
| store double %phi, ptr %p1, align 8 |
| %next = add i64 %iv, 1 |
| %cmp = icmp eq i64 %next, 1024 |
| br i1 %cmp, label %exit, label %body |
| |
| exit: |
| ret void |
| } |
| |
| ; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather. |
| define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @preserve_vector_exact_no_addr( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP5]] |
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]] |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i7 = icmp ne i64 %iv, 0 |
| %i8 = and i64 %iv, 1 |
| %i9 = icmp eq i64 %i8, 0 |
| %i10 = and i1 %i7, %i9 |
| br i1 %i10, label %if.end, label %if.then |
| |
| if.then: |
| %i26 = sdiv exact i64 %iv, 2 |
| %i29 = getelementptr inbounds float, ptr %input, i64 %i26 |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation |
| ; of any masked load/store/gather/scatter. |
| define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @preserve_exact_no_addr( |
| ; CHECK-SAME: ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 |
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i27 = sdiv exact i64 %iv, 2 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] |
| %i35 = getelementptr inbounds i64, ptr %output, i64 %iv |
| store i64 %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| ; Make sure we don't vectorize a loop with a phi feeding a poison value to |
| ; a masked load/gather. |
| define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define void @dont_vectorize_poison_phi( |
| ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[IV_INC:%.*]], %[[IF_END:.*]] ] |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_INC]], %[[IF_END]] ] |
| ; CHECK-NEXT: [[I23:%.*]] = icmp eq i64 [[IV]], 0 |
| ; CHECK-NEXT: br i1 [[I23]], label %[[IF_END]], label %[[IF_THEN:.*]] |
| ; CHECK: [[IF_THEN]]: |
| ; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[POISON]] |
| ; CHECK-NEXT: [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load [[META0]] |
| ; CHECK-NEXT: br label %[[IF_END]] |
| ; CHECK: [[IF_END]]: |
| ; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, %[[LOOP_HEADER]] ], [ [[I30]], %[[IF_THEN]] ] |
| ; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[IV]] |
| ; CHECK-NEXT: store float [[I34]], ptr [[I35]], align 4 |
| ; CHECK-NEXT: [[IV_INC]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_EXIT:.*]], label %[[LOOP_HEADER]] |
| ; CHECK: [[LOOP_EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ] |
| %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] |
| %i23 = icmp eq i64 %iv, 0 |
| br i1 %i23, label %if.end, label %if.then |
| |
| if.then: |
| %i29 = getelementptr inbounds float, ptr %input, i64 %poison |
| %i30 = load float, ptr %i29, align 4, !invariant.load !0 |
| br label %if.end |
| |
| if.end: |
| %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] |
| %i35 = getelementptr inbounds float, ptr %output, i64 %iv |
| store float %i34, ptr %i35, align 4 |
| %iv.inc = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.inc, 4 |
| br i1 %exitcond, label %loop.exit, label %loop.header |
| |
| loop.exit: |
| ret void |
| } |
| |
| @c = external global [5 x i8] |
| |
| ; Test case for https://github.com/llvm/llvm-project/issues/70590. |
| ; Note that the then block has UB, but I could not find any other way to |
| ; construct a suitable test case. |
| define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { |
| ; CHECK-LABEL: define void @pr70590_recipe_without_underlying_instr( |
| ; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[DST:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] |
| ; CHECK: [[PRED_SREM_IF]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] |
| ; CHECK: [[PRED_SREM_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] |
| ; CHECK: [[PRED_SREM_IF1]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] |
| ; CHECK: [[PRED_SREM_CONTINUE2]]: |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] |
| ; CHECK: [[PRED_SREM_IF3]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] |
| ; CHECK: [[PRED_SREM_CONTINUE4]]: |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] |
| ; CHECK: [[PRED_SREM_IF5]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] |
| ; CHECK: [[PRED_SREM_CONTINUE6]]: |
| ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], poison |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 |
| ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ] |
| %cmp = icmp eq i64 %iv, %n |
| br i1 %cmp, label %loop.latch, label %then |
| |
| then: |
| %rem = srem i64 3, 0 |
| %add3 = add i64 %rem, -3 |
| %add5 = add i64 %iv, %add3 |
| %gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5 |
| %l = load i8, ptr %gep, align 1 |
| br label %loop.latch |
| |
| loop.latch: |
| %sr = phi i8 [ 0, %loop.header ], [ %l , %then ] |
| %gep.dst = getelementptr i8, ptr %dst, i64 %iv |
| store i8 %sr, ptr %gep.dst, align 4 |
| %inc = add i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %inc, 4 |
| br i1 %exitcond.not, label %exit, label %loop.header |
| |
| exit: |
| ret void |
| } |
| |
| ; Variation of the above test with the poison value being used in all lanes. |
| define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst, ptr noalias %aux) { |
| ; CHECK-LABEL: define void @recipe_without_underlying_instr_lanes_used( |
| ; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[AUX:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] |
| ; CHECK: [[PRED_SREM_IF]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] |
| ; CHECK: [[PRED_SREM_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] |
| ; CHECK: [[PRED_SREM_IF1]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] |
| ; CHECK: [[PRED_SREM_CONTINUE2]]: |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] |
| ; CHECK: [[PRED_SREM_IF3]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] |
| ; CHECK: [[PRED_SREM_CONTINUE4]]: |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] |
| ; CHECK: [[PRED_SREM_IF5]]: |
| ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] |
| ; CHECK: [[PRED_SREM_CONTINUE6]]: |
| ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], poison |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] |
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison |
| ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 |
| ; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8 |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 |
| ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ] |
| %cmp = icmp eq i64 %iv, %n |
| br i1 %cmp, label %loop.latch, label %then |
| |
| then: |
| %rem = srem i64 3, 0 |
| %add3 = add i64 %rem, -3 |
| %add5 = add i64 %iv, %add3 |
| %gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5 |
| %l = load i8, ptr %gep, align 1 |
| br label %loop.latch |
| |
| loop.latch: |
| %sr = phi i8 [ 0, %loop.header ], [ %l , %then ] |
| %p = phi i64 [ 0, %loop.header ], [ %rem, %then ] |
| store i64 %p, ptr %aux |
| %gep.dst = getelementptr i8, ptr %dst, i64 %iv |
| store i8 %sr, ptr %gep.dst, align 4 |
| %inc = add i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %inc, 4 |
| br i1 %exitcond.not, label %exit, label %loop.header |
| |
| exit: |
| ret void |
| } |
| |
| ; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds |
| ; versionused unconditionally in the store in the latch. |
| ; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore |
| define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr readonly %C) #0 { |
| ; CHECK-LABEL: define void @Bgep_inbounds_unconditionally_due_to_store( |
| ; CHECK-SAME: ptr noalias [[B:%.*]], ptr readonly [[C:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 |
| ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 2.000000e+00) |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP5]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0 |
| ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 |
| ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; |
| |
| entry: |
| br label %loop.body |
| |
| loop.body: |
| %iv1 = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %C.gep = getelementptr inbounds i32, ptr %C, i64 %iv1 |
| %C.lv = load i32, ptr %C.gep, align 4 |
| %cmp = icmp eq i32 %C.lv, 20 |
| %B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv1 |
| br i1 %cmp, label %loop.latch, label %else |
| |
| else: |
| %B.lv = load float, ptr %B.gep.0, align 4 |
| %add = fadd float %B.lv, 2.0 |
| br label %loop.latch |
| |
| loop.latch: |
| %add.sink = phi float [ %add, %else ], [ 33.0, %loop.body ] |
| store float %add.sink, ptr %B.gep.0, align 4 |
| %iv.next = add nuw nsw i64 %iv1, 1 |
| %exitcond.not = icmp eq i64 %iv.next, 10000 |
| br i1 %exitcond.not, label %exit, label %loop.body |
| |
| exit: |
| ret void |
| } |
| |
| attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" } |
| |
| !0 = !{} |