| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s |
| ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE |
| |
| ; #include <stdint.h> |
| ; |
| ; int foo(float *A, int n) { |
| ; float sum = 0; |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; sum += 7*A[i*4 ] + |
| ; 7*A[i*4+1] + |
| ; 7*A[i*4+2] + |
| ; 7*A[i*4+3]; |
| ; } |
| ; return sum; |
| ; } |
| |
| define i32 @add_red(float* %A, i32 %n) { |
| ; CHECK-LABEL: @add_red( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] |
| ; CHECK-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] |
| ; CHECK-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> |
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.cond.for.end_crit_edge: |
| ; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 |
| ; CHECK-NEXT: br label [[FOR_END]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| ; STORE-LABEL: @add_red( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 |
| ; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]] |
| ; STORE-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2 |
| ; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]] |
| ; STORE-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3 |
| ; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] |
| ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* |
| ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> |
| ; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; STORE: for.cond.for.end_crit_edge: |
| ; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 |
| ; STORE-NEXT: br label [[FOR_END]] |
| ; STORE: for.end: |
| ; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| entry: |
| %cmp31 = icmp sgt i32 %n, 0 |
| br i1 %cmp31, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %0 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ] |
| %mul = shl nsw i64 %i.033, 2 |
| %arrayidx = getelementptr inbounds float, float* %A, i64 %mul |
| %1 = load float, float* %arrayidx, align 4 |
| %mul2 = fmul float %1, 7.000000e+00 |
| %add28 = or i64 %mul, 1 |
| %arrayidx4 = getelementptr inbounds float, float* %A, i64 %add28 |
| %2 = load float, float* %arrayidx4, align 4 |
| %mul5 = fmul float %2, 7.000000e+00 |
| %add6 = fadd fast float %mul2, %mul5 |
| %add829 = or i64 %mul, 2 |
| %arrayidx9 = getelementptr inbounds float, float* %A, i64 %add829 |
| %3 = load float, float* %arrayidx9, align 4 |
| %mul10 = fmul float %3, 7.000000e+00 |
| %add11 = fadd fast float %add6, %mul10 |
| %add1330 = or i64 %mul, 3 |
| %arrayidx14 = getelementptr inbounds float, float* %A, i64 %add1330 |
| %4 = load float, float* %arrayidx14, align 4 |
| %mul15 = fmul float %4, 7.000000e+00 |
| %add16 = fadd fast float %add11, %mul15 |
| %add17 = fadd fast float %sum.032, %add16 |
| %inc = add nsw i64 %i.033, 1 |
| %exitcond = icmp eq i64 %inc, %0 |
| br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body |
| |
| for.cond.for.end_crit_edge: |
| %phitmp = fptosi float %add17 to i32 |
| br label %for.end |
| |
| for.end: |
| %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] |
| ret i32 %sum.0.lcssa |
| } |
| |
| ; int foo(float * restrict A, float * restrict B, int n) { |
| ; float sum = 0; |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; sum *= B[0]*A[i*4 ] + |
| ; B[1]*A[i*4+1] + |
| ; B[2]*A[i*4+2] + |
| ; B[3]*A[i*4+3]; |
| ; } |
| ; return sum; |
| ; } |
| |
| define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { |
| ; CHECK-LABEL: @mul_red( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] |
| ; CHECK-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] |
| ; CHECK-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 |
| ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) |
| ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.cond.for.end_crit_edge: |
| ; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 |
| ; CHECK-NEXT: br label [[FOR_END]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| ; STORE-LABEL: @mul_red( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 |
| ; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 |
| ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] |
| ; STORE-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2 |
| ; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]] |
| ; STORE-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3 |
| ; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]] |
| ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* |
| ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 |
| ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] |
| ; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) |
| ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; STORE: for.cond.for.end_crit_edge: |
| ; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 |
| ; STORE-NEXT: br label [[FOR_END]] |
| ; STORE: for.end: |
| ; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| entry: |
| %cmp38 = icmp sgt i32 %n, 0 |
| br i1 %cmp38, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %0 = load float, float* %B, align 4 |
| %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 |
| %1 = load float, float* %arrayidx4, align 4 |
| %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 |
| %2 = load float, float* %arrayidx9, align 4 |
| %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 |
| %3 = load float, float* %arrayidx15, align 4 |
| %4 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ] |
| %mul = shl nsw i64 %i.040, 2 |
| %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul |
| %5 = load float, float* %arrayidx2, align 4 |
| %mul3 = fmul float %0, %5 |
| %add35 = or i64 %mul, 1 |
| %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add35 |
| %6 = load float, float* %arrayidx6, align 4 |
| %mul7 = fmul float %1, %6 |
| %add8 = fadd fast float %mul3, %mul7 |
| %add1136 = or i64 %mul, 2 |
| %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1136 |
| %7 = load float, float* %arrayidx12, align 4 |
| %mul13 = fmul float %2, %7 |
| %add14 = fadd fast float %add8, %mul13 |
| %add1737 = or i64 %mul, 3 |
| %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1737 |
| %8 = load float, float* %arrayidx18, align 4 |
| %mul19 = fmul float %3, %8 |
| %add20 = fadd fast float %add14, %mul19 |
| %mul21 = fmul float %sum.039, %add20 |
| %inc = add nsw i64 %i.040, 1 |
| %exitcond = icmp eq i64 %inc, %4 |
| br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body |
| |
| for.cond.for.end_crit_edge: |
| %phitmp = fptosi float %mul21 to i32 |
| br label %for.end |
| |
| for.end: |
| %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] |
| ret i32 %sum.0.lcssa |
| } |
| |
| ; int foo(float * restrict A, float * restrict B, int n) { |
| ; float sum = 0; |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; sum += B[0]*A[i*6 ] + |
| ; B[1]*A[i*6+1] + |
| ; B[2]*A[i*6+2] + |
| ; B[3]*A[i*6+3] + |
| ; B[4]*A[i*6+4] + |
| ; B[5]*A[i*6+5] + |
| ; B[6]*A[i*6+6] + |
| ; B[7]*A[i*6+7] + |
| ; B[8]*A[i*6+8]; |
| ; } |
| ; return sum; |
| ; } |
| |
| define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { |
| ; CHECK-LABEL: @long_red( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] |
| ; CHECK-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] |
| ; CHECK-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] |
| ; CHECK-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 |
| ; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] |
| ; CHECK-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 |
| ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] |
| ; CHECK-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 |
| ; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] |
| ; CHECK-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 |
| ; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] |
| ; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 |
| ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 |
| ; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) |
| ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] |
| ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.cond.for.end_crit_edge: |
| ; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 |
| ; CHECK-NEXT: br label [[FOR_END]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| ; STORE-LABEL: @long_red( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; STORE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4 |
| ; STORE-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5 |
| ; STORE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6 |
| ; STORE-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; STORE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 |
| ; STORE-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 |
| ; STORE-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 |
| ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]] |
| ; STORE-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2 |
| ; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]] |
| ; STORE-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3 |
| ; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]] |
| ; STORE-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4 |
| ; STORE-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]] |
| ; STORE-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5 |
| ; STORE-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]] |
| ; STORE-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6 |
| ; STORE-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]] |
| ; STORE-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7 |
| ; STORE-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]] |
| ; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* |
| ; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 |
| ; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] |
| ; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 |
| ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] |
| ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 |
| ; STORE-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] |
| ; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) |
| ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] |
| ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; STORE: for.cond.for.end_crit_edge: |
| ; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 |
| ; STORE-NEXT: br label [[FOR_END]] |
| ; STORE: for.end: |
| ; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| entry: |
| %cmp81 = icmp sgt i32 %n, 0 |
| br i1 %cmp81, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %0 = load float, float* %B, align 4 |
| %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 |
| %1 = load float, float* %arrayidx4, align 4 |
| %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 |
| %2 = load float, float* %arrayidx9, align 4 |
| %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 |
| %3 = load float, float* %arrayidx15, align 4 |
| %arrayidx21 = getelementptr inbounds float, float* %B, i64 4 |
| %4 = load float, float* %arrayidx21, align 4 |
| %arrayidx27 = getelementptr inbounds float, float* %B, i64 5 |
| %5 = load float, float* %arrayidx27, align 4 |
| %arrayidx33 = getelementptr inbounds float, float* %B, i64 6 |
| %6 = load float, float* %arrayidx33, align 4 |
| %arrayidx39 = getelementptr inbounds float, float* %B, i64 7 |
| %7 = load float, float* %arrayidx39, align 4 |
| %arrayidx45 = getelementptr inbounds float, float* %B, i64 8 |
| %8 = load float, float* %arrayidx45, align 4 |
| %9 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ] |
| %mul = mul nsw i64 %i.083, 6 |
| %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul |
| %10 = load float, float* %arrayidx2, align 4 |
| %mul3 = fmul fast float %0, %10 |
| %add80 = or i64 %mul, 1 |
| %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add80 |
| %11 = load float, float* %arrayidx6, align 4 |
| %mul7 = fmul fast float %1, %11 |
| %add8 = fadd fast float %mul3, %mul7 |
| %add11 = add nsw i64 %mul, 2 |
| %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add11 |
| %12 = load float, float* %arrayidx12, align 4 |
| %mul13 = fmul fast float %2, %12 |
| %add14 = fadd fast float %add8, %mul13 |
| %add17 = add nsw i64 %mul, 3 |
| %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add17 |
| %13 = load float, float* %arrayidx18, align 4 |
| %mul19 = fmul fast float %3, %13 |
| %add20 = fadd fast float %add14, %mul19 |
| %add23 = add nsw i64 %mul, 4 |
| %arrayidx24 = getelementptr inbounds float, float* %A, i64 %add23 |
| %14 = load float, float* %arrayidx24, align 4 |
| %mul25 = fmul fast float %4, %14 |
| %add26 = fadd fast float %add20, %mul25 |
| %add29 = add nsw i64 %mul, 5 |
| %arrayidx30 = getelementptr inbounds float, float* %A, i64 %add29 |
| %15 = load float, float* %arrayidx30, align 4 |
| %mul31 = fmul fast float %5, %15 |
| %add32 = fadd fast float %add26, %mul31 |
| %add35 = add nsw i64 %mul, 6 |
| %arrayidx36 = getelementptr inbounds float, float* %A, i64 %add35 |
| %16 = load float, float* %arrayidx36, align 4 |
| %mul37 = fmul fast float %6, %16 |
| %add38 = fadd fast float %add32, %mul37 |
| %add41 = add nsw i64 %mul, 7 |
| %arrayidx42 = getelementptr inbounds float, float* %A, i64 %add41 |
| %17 = load float, float* %arrayidx42, align 4 |
| %mul43 = fmul fast float %7, %17 |
| %add44 = fadd fast float %add38, %mul43 |
| %add47 = add nsw i64 %mul, 8 |
| %arrayidx48 = getelementptr inbounds float, float* %A, i64 %add47 |
| %18 = load float, float* %arrayidx48, align 4 |
| %mul49 = fmul fast float %8, %18 |
| %add50 = fadd fast float %add44, %mul49 |
| %add51 = fadd fast float %sum.082, %add50 |
| %inc = add nsw i64 %i.083, 1 |
| %exitcond = icmp eq i64 %inc, %9 |
| br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body |
| |
| for.cond.for.end_crit_edge: |
| %phitmp = fptosi float %add51 to i32 |
| br label %for.end |
| |
| for.end: |
| %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] |
| ret i32 %sum.0.lcssa |
| } |
| |
| ; int foo(float * restrict A, float * restrict B, int n) { |
| ; float sum = 0; |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; sum += B[0]*A[i*4 ]; |
| ; sum += B[1]*A[i*4+1]; |
| ; sum += B[2]*A[i*4+2]; |
| ; sum += B[3]*A[i*4+3]; |
| ; } |
| ; return sum; |
| ; } |
| |
| define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { |
| ; CHECK-LABEL: @chain_red( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] |
| ; CHECK-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] |
| ; CHECK-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 |
| ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) |
| ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.cond.for.end_crit_edge: |
| ; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 |
| ; CHECK-NEXT: br label [[FOR_END]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| ; STORE-LABEL: @chain_red( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; STORE-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; STORE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 |
| ; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 |
| ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]] |
| ; STORE-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2 |
| ; STORE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]] |
| ; STORE-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3 |
| ; STORE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]] |
| ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* |
| ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 |
| ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] |
| ; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) |
| ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] |
| ; STORE: for.cond.for.end_crit_edge: |
| ; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32 |
| ; STORE-NEXT: br label [[FOR_END]] |
| ; STORE: for.end: |
| ; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] |
| ; STORE-NEXT: ret i32 [[SUM_0_LCSSA]] |
| ; |
| entry: |
| %cmp41 = icmp sgt i32 %n, 0 |
| br i1 %cmp41, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %0 = load float, float* %B, align 4 |
| %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 |
| %1 = load float, float* %arrayidx4, align 4 |
| %arrayidx10 = getelementptr inbounds float, float* %B, i64 2 |
| %2 = load float, float* %arrayidx10, align 4 |
| %arrayidx16 = getelementptr inbounds float, float* %B, i64 3 |
| %3 = load float, float* %arrayidx16, align 4 |
| %4 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ] |
| %mul = shl nsw i64 %i.043, 2 |
| %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul |
| %5 = load float, float* %arrayidx2, align 4 |
| %mul3 = fmul fast float %0, %5 |
| %add = fadd fast float %sum.042, %mul3 |
| %add638 = or i64 %mul, 1 |
| %arrayidx7 = getelementptr inbounds float, float* %A, i64 %add638 |
| %6 = load float, float* %arrayidx7, align 4 |
| %mul8 = fmul fast float %1, %6 |
| %add9 = fadd fast float %add, %mul8 |
| %add1239 = or i64 %mul, 2 |
| %arrayidx13 = getelementptr inbounds float, float* %A, i64 %add1239 |
| %7 = load float, float* %arrayidx13, align 4 |
| %mul14 = fmul fast float %2, %7 |
| %add15 = fadd fast float %add9, %mul14 |
| %add1840 = or i64 %mul, 3 |
| %arrayidx19 = getelementptr inbounds float, float* %A, i64 %add1840 |
| %8 = load float, float* %arrayidx19, align 4 |
| %mul20 = fmul fast float %3, %8 |
| %add21 = fadd fast float %add15, %mul20 |
| %inc = add nsw i64 %i.043, 1 |
| %exitcond = icmp eq i64 %inc, %4 |
| br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body |
| |
| for.cond.for.end_crit_edge: |
| %phitmp = fptosi float %add21 to i32 |
| br label %for.end |
| |
| for.end: |
| %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] |
| ret i32 %sum.0.lcssa |
| } |
| |
| ; void foo(const float *arg_A, unsigned arg_B, float *array) { |
| ; for (uint32_t i = 0; i < 6; ++i) { |
| ; const float *ptr = arg_A + i; |
| ; float w0 = array[i * 4 + 0]; |
| ; float w1 = array[i * 4 + 1]; |
| ; float w2 = array[i * 4 + 2]; |
| ; float w3 = array[i * 4 + 3]; |
| ; |
| ; for (unsigned j = 0; j < arg_B; ++j) { |
| ; const float x1 = *ptr - (-1.1f * w0) - (1.2f * w1); |
| ; const float x2 = (2.1f * x1) + (-2.2f * w0) + (2.3f * w1); |
| ; const float x3 = x2 - (-3.1f * w2) - (3.2f * w3); |
| ; const float x4 = x3 + (-4.0f * w2) + w3; |
| ; w1 = w0; |
| ; w0 = x1; |
| ; w3 = w2; |
| ; w2 = x3; |
| ; } |
| ; |
| ; array[i * 4 + 0] = w0; |
| ; array[i * 4 + 1] = w1; |
| ; array[i * 4 + 2] = w2; |
| ; array[i * 4 + 3] = w3; |
| ; } |
| ; } |
| |
| define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) { |
| ; CHECK-LABEL: @foo( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.cond.cleanup: |
| ; CHECK-NEXT: ret void |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 |
| ; CHECK-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] |
| ; CHECK: for.body16.lr.ph: |
| ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 |
| ; CHECK-NEXT: br label [[FOR_BODY16:%.*]] |
| ; CHECK: for.cond.cleanup15: |
| ; CHECK-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 |
| ; CHECK-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 |
| ; CHECK-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 |
| ; CHECK-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 |
| ; CHECK-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] |
| ; CHECK: for.body16: |
| ; CHECK-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] |
| ; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 |
| ; CHECK-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 |
| ; CHECK-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] |
| ; CHECK-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] |
| ; CHECK-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 |
| ; CHECK-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 |
| ; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 |
| ; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 |
| ; CHECK-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 |
| ; CHECK-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] |
| ; CHECK-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] |
| ; CHECK-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] |
| ; CHECK-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] |
| ; CHECK-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] |
| ; |
| ; STORE-LABEL: @foo( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.cond.cleanup: |
| ; STORE-NEXT: ret void |
| ; STORE: for.body: |
| ; STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] |
| ; STORE-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 |
| ; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] |
| ; STORE-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 |
| ; STORE-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] |
| ; STORE-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 |
| ; STORE-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 |
| ; STORE-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] |
| ; STORE-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 |
| ; STORE-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 |
| ; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] |
| ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 |
| ; STORE-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] |
| ; STORE: for.body16.lr.ph: |
| ; STORE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] |
| ; STORE-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 |
| ; STORE-NEXT: br label [[FOR_BODY16:%.*]] |
| ; STORE: for.cond.cleanup15: |
| ; STORE-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 |
| ; STORE-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 |
| ; STORE-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 |
| ; STORE-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 |
| ; STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 |
| ; STORE-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 |
| ; STORE-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] |
| ; STORE: for.body16: |
| ; STORE-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] |
| ; STORE-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 |
| ; STORE-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 |
| ; STORE-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] |
| ; STORE-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] |
| ; STORE-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 |
| ; STORE-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 |
| ; STORE-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 |
| ; STORE-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 |
| ; STORE-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 |
| ; STORE-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] |
| ; STORE-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] |
| ; STORE-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] |
| ; STORE-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] |
| ; STORE-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] |
| ; |
| entry: |
| %cmp1495 = icmp eq i32 %arg_B, 0 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.cond.cleanup15 |
| ret void |
| |
| for.body: ; preds = %for.cond.cleanup15, %entry |
| %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond.cleanup15 ] |
| %0 = shl i64 %indvars.iv, 2 |
| %arrayidx = getelementptr inbounds float, float* %array, i64 %0 |
| %1 = load float, float* %arrayidx, align 4 |
| %2 = or i64 %0, 1 |
| %arrayidx4 = getelementptr inbounds float, float* %array, i64 %2 |
| %3 = load float, float* %arrayidx4, align 4 |
| %4 = or i64 %0, 2 |
| %arrayidx8 = getelementptr inbounds float, float* %array, i64 %4 |
| %5 = load float, float* %arrayidx8, align 4 |
| %6 = or i64 %0, 3 |
| %arrayidx12 = getelementptr inbounds float, float* %array, i64 %6 |
| %7 = load float, float* %arrayidx12, align 4 |
| br i1 %cmp1495, label %for.cond.cleanup15, label %for.body16.lr.ph |
| |
| for.body16.lr.ph: ; preds = %for.body |
| %add.ptr = getelementptr inbounds float, float* %arg_A, i64 %indvars.iv |
| %8 = load float, float* %add.ptr, align 4 |
| br label %for.body16 |
| |
| for.cond.cleanup15: ; preds = %for.body16, %for.body |
| %w2.0.lcssa = phi float [ %5, %for.body ], [ %sub28, %for.body16 ] |
| %w3.0.lcssa = phi float [ %7, %for.body ], [ %w2.096, %for.body16 ] |
| %w1.0.lcssa = phi float [ %3, %for.body ], [ %w0.0100, %for.body16 ] |
| %w0.0.lcssa = phi float [ %1, %for.body ], [ %sub19, %for.body16 ] |
| store float %w0.0.lcssa, float* %arrayidx, align 4 |
| store float %w1.0.lcssa, float* %arrayidx4, align 4 |
| store float %w2.0.lcssa, float* %arrayidx8, align 4 |
| store float %w3.0.lcssa, float* %arrayidx12, align 4 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond109 = icmp eq i64 %indvars.iv.next, 6 |
| br i1 %exitcond109, label %for.cond.cleanup, label %for.body |
| |
| for.body16: ; preds = %for.body16, %for.body16.lr.ph |
| %w0.0100 = phi float [ %1, %for.body16.lr.ph ], [ %sub19, %for.body16 ] |
| %w1.099 = phi float [ %3, %for.body16.lr.ph ], [ %w0.0100, %for.body16 ] |
| %j.098 = phi i32 [ 0, %for.body16.lr.ph ], [ %inc, %for.body16 ] |
| %w3.097 = phi float [ %7, %for.body16.lr.ph ], [ %w2.096, %for.body16 ] |
| %w2.096 = phi float [ %5, %for.body16.lr.ph ], [ %sub28, %for.body16 ] |
| %mul17 = fmul fast float %w0.0100, 0x3FF19999A0000000 |
| %mul18.neg = fmul fast float %w1.099, 0xBFF3333340000000 |
| %sub92 = fadd fast float %mul17, %mul18.neg |
| %sub19 = fadd fast float %sub92, %8 |
| %mul20 = fmul fast float %sub19, 0x4000CCCCC0000000 |
| %mul21.neg = fmul fast float %w0.0100, 0xC0019999A0000000 |
| %mul23 = fmul fast float %w1.099, 0x4002666660000000 |
| %mul25 = fmul fast float %w2.096, 0x4008CCCCC0000000 |
| %mul27.neg = fmul fast float %w3.097, 0xC0099999A0000000 |
| %add2293 = fadd fast float %mul27.neg, %mul25 |
| %add24 = fadd fast float %add2293, %mul23 |
| %sub2694 = fadd fast float %add24, %mul21.neg |
| %sub28 = fadd fast float %sub2694, %mul20 |
| %inc = add nuw i32 %j.098, 1 |
| %exitcond = icmp eq i32 %inc, %arg_B |
| br i1 %exitcond, label %for.cond.cleanup15, label %for.body16 |
| } |
| |
| |
| ; void foo(double * restrict A, double * restrict B, double * restrict C, |
| ; int n) { |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; C[i] = B[0] *A[i*4 ] + B[1] *A[i*4+1]; |
| ; } |
| ; } |
| |
| define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) { |
| ; CHECK-LABEL: @store_red_double( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[ARRAYIDX2]], align 8 |
| ; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[TMP0]], [[TMP3]] |
| ; CHECK-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX6]], align 8 |
| ; CHECK-NEXT: [[MUL7:%.*]] = fmul fast double [[TMP1]], [[TMP4]] |
| ; CHECK-NEXT: [[ADD8:%.*]] = fadd fast double [[MUL3]], [[MUL7]] |
| ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] |
| ; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @store_red_double( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B]] to <2 x double>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 |
| ; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 |
| ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] |
| ; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>* |
| ; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 |
| ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]] |
| ; STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 |
| ; STORE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 |
| ; STORE-NEXT: [[ADD8:%.*]] = fadd fast double [[TMP6]], [[TMP7]] |
| ; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] |
| ; STORE-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] |
| ; STORE: for.end: |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %cmp17 = icmp sgt i32 %n, 0 |
| br i1 %cmp17, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %0 = load double, double* %B, align 8 |
| %arrayidx4 = getelementptr inbounds double, double* %B, i64 1 |
| %1 = load double, double* %arrayidx4, align 8 |
| %2 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %mul = shl nsw i64 %i.018, 2 |
| %arrayidx2 = getelementptr inbounds double, double* %A, i64 %mul |
| %3 = load double, double* %arrayidx2, align 8 |
| %mul3 = fmul fast double %0, %3 |
| %add16 = or i64 %mul, 1 |
| %arrayidx6 = getelementptr inbounds double, double* %A, i64 %add16 |
| %4 = load double, double* %arrayidx6, align 8 |
| %mul7 = fmul fast double %1, %4 |
| %add8 = fadd fast double %mul3, %mul7 |
| %arrayidx9 = getelementptr inbounds double, double* %C, i64 %i.018 |
| store double %add8, double* %arrayidx9, align 8 |
| %inc = add nsw i64 %i.018, 1 |
| %exitcond = icmp eq i64 %inc, %2 |
| br i1 %exitcond, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |
| |
| ; int foo(float * restrict A, float * restrict B, float * restrict C, int n) { |
| ; float sum = 0; |
| ; for (intptr_t i=0; i < n; ++i) { |
| ; C[i] = B[0] *A[i*4 ] + |
| ; B[1] *A[i*4+1] + |
| ; B[2] *A[i*4+2] + |
| ; B[3] *A[i*4+3]; |
| ; } |
| ; return sum; |
| ; } |
| |
| define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) { |
| ; CHECK-LABEL: @store_red( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; CHECK-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; CHECK: for.body.lr.ph: |
| ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B]], align 4 |
| ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: [[MUL3:%.*]] = fmul fast float [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 |
| ; CHECK-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX6]], align 4 |
| ; CHECK-NEXT: [[MUL7:%.*]] = fmul fast float [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float [[MUL3]], [[MUL7]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX9]], align 4 |
| ; CHECK-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX12]], align 4 |
| ; CHECK-NEXT: [[MUL13:%.*]] = fmul fast float [[TMP5]], [[TMP6]] |
| ; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], [[MUL13]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX15]], align 4 |
| ; CHECK-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX18]], align 4 |
| ; CHECK-NEXT: [[MUL19:%.*]] = fmul fast float [[TMP7]], [[TMP8]] |
| ; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], [[MUL19]] |
| ; CHECK-NEXT: store float [[ADD20]], float* [[C_ADDR_038]], align 4 |
| ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| ; STORE-LABEL: @store_red( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| ; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] |
| ; STORE: for.body.lr.ph: |
| ; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 |
| ; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 |
| ; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 |
| ; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 |
| ; STORE-NEXT: br label [[FOR_BODY:%.*]] |
| ; STORE: for.body: |
| ; STORE-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] |
| ; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 |
| ; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] |
| ; STORE-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 |
| ; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] |
| ; STORE-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 |
| ; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] |
| ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B]] to <4 x float>* |
| ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; STORE-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 |
| ; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] |
| ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* |
| ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 |
| ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] |
| ; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) |
| ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 |
| ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 |
| ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 |
| ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] |
| ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] |
| ; STORE: for.end: |
| ; STORE-NEXT: ret i32 0 |
| ; |
| entry: |
| %cmp37 = icmp sgt i32 %n, 0 |
| br i1 %cmp37, label %for.body.lr.ph, label %for.end |
| |
| for.body.lr.ph: |
| %arrayidx4 = getelementptr inbounds float, float* %B, i64 1 |
| %arrayidx9 = getelementptr inbounds float, float* %B, i64 2 |
| %arrayidx15 = getelementptr inbounds float, float* %B, i64 3 |
| %0 = sext i32 %n to i64 |
| br label %for.body |
| |
| for.body: |
| %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] |
| %1 = load float, float* %B, align 4 |
| %mul = shl nsw i64 %i.039, 2 |
| %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul |
| %2 = load float, float* %arrayidx2, align 4 |
| %mul3 = fmul fast float %1, %2 |
| %3 = load float, float* %arrayidx4, align 4 |
| %add34 = or i64 %mul, 1 |
| %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add34 |
| %4 = load float, float* %arrayidx6, align 4 |
| %mul7 = fmul fast float %3, %4 |
| %add8 = fadd fast float %mul3, %mul7 |
| %5 = load float, float* %arrayidx9, align 4 |
| %add1135 = or i64 %mul, 2 |
| %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1135 |
| %6 = load float, float* %arrayidx12, align 4 |
| %mul13 = fmul fast float %5, %6 |
| %add14 = fadd fast float %add8, %mul13 |
| %7 = load float, float* %arrayidx15, align 4 |
| %add1736 = or i64 %mul, 3 |
| %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1736 |
| %8 = load float, float* %arrayidx18, align 4 |
| %mul19 = fmul fast float %7, %8 |
| %add20 = fadd fast float %add14, %mul19 |
| store float %add20, float* %C.addr.038, align 4 |
| %incdec.ptr = getelementptr inbounds float, float* %C.addr.038, i64 1 |
| %inc = add nsw i64 %i.039, 1 |
| %exitcond = icmp eq i64 %inc, %0 |
| br i1 %exitcond, label %for.end, label %for.body |
| |
| for.end: |
| ret i32 0 |
| } |
| |
| @arr_i32 = global [32 x i32] zeroinitializer, align 16 |
| @arr_float = global [32 x float] zeroinitializer, align 16 |
| |
| define void @float_red_example4(float* %res) { |
| ; CHECK-LABEL: @float_red_example4( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: store float [[ADD_2]], float* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @float_red_example4( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP0]]) |
| ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| %add = fadd fast float %1, %0 |
| %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| %add.1 = fadd fast float %2, %add |
| %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| %add.2 = fadd fast float %3, %add.1 |
| store float %add.2, float* %res, align 16 |
| ret void |
| } |
| |
| define void @float_red_example8(float* %res) { |
| ; CHECK-LABEL: @float_red_example8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 |
| ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 |
| ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] |
| ; CHECK-NEXT: store float [[ADD_6]], float* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @float_red_example8( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) |
| ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| %add = fadd fast float %1, %0 |
| %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| %add.1 = fadd fast float %2, %add |
| %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| %add.2 = fadd fast float %3, %add.1 |
| %4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 |
| %add.3 = fadd fast float %4, %add.2 |
| %5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 |
| %add.4 = fadd fast float %5, %add.3 |
| %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 |
| %add.5 = fadd fast float %6, %add.4 |
| %7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 |
| %add.6 = fadd fast float %7, %add.5 |
| store float %add.6, float* %res, align 16 |
| ret void |
| } |
| |
| define void @float_red_example16(float* %res) { |
| ; CHECK-LABEL: @float_red_example16( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 |
| ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 |
| ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16 |
| ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP8]], [[ADD_6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4 |
| ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP9]], [[ADD_7]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8 |
| ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP10]], [[ADD_8]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4 |
| ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP11]], [[ADD_9]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16 |
| ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP12]], [[ADD_10]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4 |
| ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP13]], [[ADD_11]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8 |
| ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP14]], [[ADD_12]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4 |
| ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP15]], [[ADD_13]] |
| ; CHECK-NEXT: store float [[ADD_14]], float* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @float_red_example16( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]]) |
| ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 |
| %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 |
| %add = fadd fast float %1, %0 |
| %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 |
| %add.1 = fadd fast float %2, %add |
| %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 |
| %add.2 = fadd fast float %3, %add.1 |
| %4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 |
| %add.3 = fadd fast float %4, %add.2 |
| %5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 |
| %add.4 = fadd fast float %5, %add.3 |
| %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 |
| %add.5 = fadd fast float %6, %add.4 |
| %7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 |
| %add.6 = fadd fast float %7, %add.5 |
| %8 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16 |
| %add.7 = fadd fast float %8, %add.6 |
| %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4 |
| %add.8 = fadd fast float %9, %add.7 |
| %10 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8 |
| %add.9 = fadd fast float %10, %add.8 |
| %11 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4 |
| %add.10 = fadd fast float %11, %add.9 |
| %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16 |
| %add.11 = fadd fast float %12, %add.10 |
| %13 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4 |
| %add.12 = fadd fast float %13, %add.11 |
| %14 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8 |
| %add.13 = fadd fast float %14, %add.12 |
| %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4 |
| %add.14 = fadd fast float %15, %add.13 |
| store float %add.14, float* %res, align 16 |
| ret void |
| } |
| |
| define void @i32_red_example4(i32* %res) { |
| ; CHECK-LABEL: @i32_red_example4( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_example4( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) |
| ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| store i32 %add.2, i32* %res, align 16 |
| ret void |
| } |
| |
| define void @i32_red_example8(i32* %res) { |
| ; CHECK-LABEL: @i32_red_example8( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| ; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| ; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] |
| ; CHECK-NEXT: store i32 [[ADD_6]], i32* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_example8( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) |
| ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| %add.3 = add nsw i32 %4, %add.2 |
| %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| %add.4 = add nsw i32 %5, %add.3 |
| %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| %add.5 = add nsw i32 %6, %add.4 |
| %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| %add.6 = add nsw i32 %7, %add.5 |
| store i32 %add.6, i32* %res, align 16 |
| ret void |
| } |
| |
| define void @i32_red_example16(i32* %res) { |
| ; CHECK-LABEL: @i32_red_example16( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| ; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| ; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 |
| ; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 |
| ; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 |
| ; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 |
| ; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 |
| ; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 |
| ; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 |
| ; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 |
| ; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] |
| ; CHECK-NEXT: store i32 [[ADD_14]], i32* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_example16( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) |
| ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| %add.3 = add nsw i32 %4, %add.2 |
| %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| %add.4 = add nsw i32 %5, %add.3 |
| %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| %add.5 = add nsw i32 %6, %add.4 |
| %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| %add.6 = add nsw i32 %7, %add.5 |
| %8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 |
| %add.7 = add nsw i32 %8, %add.6 |
| %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 |
| %add.8 = add nsw i32 %9, %add.7 |
| %10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 |
| %add.9 = add nsw i32 %10, %add.8 |
| %11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 |
| %add.10 = add nsw i32 %11, %add.9 |
| %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 |
| %add.11 = add nsw i32 %12, %add.10 |
| %13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 |
| %add.12 = add nsw i32 %13, %add.11 |
| %14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 |
| %add.13 = add nsw i32 %14, %add.12 |
| %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 |
| %add.14 = add nsw i32 %15, %add.13 |
| store i32 %add.14, i32* %res, align 16 |
| ret void |
| } |
| |
| define void @i32_red_example32(i32* %res) { |
| ; CHECK-LABEL: @i32_red_example32( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| ; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| ; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 |
| ; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 |
| ; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 |
| ; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 |
| ; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 |
| ; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 |
| ; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 |
| ; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 |
| ; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16 |
| ; CHECK-NEXT: [[ADD_15:%.*]] = add nsw i32 [[TMP16]], [[ADD_14]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4 |
| ; CHECK-NEXT: [[ADD_16:%.*]] = add nsw i32 [[TMP17]], [[ADD_15]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8 |
| ; CHECK-NEXT: [[ADD_17:%.*]] = add nsw i32 [[TMP18]], [[ADD_16]] |
| ; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4 |
| ; CHECK-NEXT: [[ADD_18:%.*]] = add nsw i32 [[TMP19]], [[ADD_17]] |
| ; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16 |
| ; CHECK-NEXT: [[ADD_19:%.*]] = add nsw i32 [[TMP20]], [[ADD_18]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4 |
| ; CHECK-NEXT: [[ADD_20:%.*]] = add nsw i32 [[TMP21]], [[ADD_19]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8 |
| ; CHECK-NEXT: [[ADD_21:%.*]] = add nsw i32 [[TMP22]], [[ADD_20]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4 |
| ; CHECK-NEXT: [[ADD_22:%.*]] = add nsw i32 [[TMP23]], [[ADD_21]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16 |
| ; CHECK-NEXT: [[ADD_23:%.*]] = add nsw i32 [[TMP24]], [[ADD_22]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4 |
| ; CHECK-NEXT: [[ADD_24:%.*]] = add nsw i32 [[TMP25]], [[ADD_23]] |
| ; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8 |
| ; CHECK-NEXT: [[ADD_25:%.*]] = add nsw i32 [[TMP26]], [[ADD_24]] |
| ; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4 |
| ; CHECK-NEXT: [[ADD_26:%.*]] = add nsw i32 [[TMP27]], [[ADD_25]] |
| ; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16 |
| ; CHECK-NEXT: [[ADD_27:%.*]] = add nsw i32 [[TMP28]], [[ADD_26]] |
| ; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4 |
| ; CHECK-NEXT: [[ADD_28:%.*]] = add nsw i32 [[TMP29]], [[ADD_27]] |
| ; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8 |
| ; CHECK-NEXT: [[ADD_29:%.*]] = add nsw i32 [[TMP30]], [[ADD_28]] |
| ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4 |
| ; CHECK-NEXT: [[ADD_30:%.*]] = add nsw i32 [[TMP31]], [[ADD_29]] |
| ; CHECK-NEXT: store i32 [[ADD_30]], i32* [[RES:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_example32( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) |
| ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| %add.3 = add nsw i32 %4, %add.2 |
| %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| %add.4 = add nsw i32 %5, %add.3 |
| %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| %add.5 = add nsw i32 %6, %add.4 |
| %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| %add.6 = add nsw i32 %7, %add.5 |
| %8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 |
| %add.7 = add nsw i32 %8, %add.6 |
| %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 |
| %add.8 = add nsw i32 %9, %add.7 |
| %10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 |
| %add.9 = add nsw i32 %10, %add.8 |
| %11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 |
| %add.10 = add nsw i32 %11, %add.9 |
| %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 |
| %add.11 = add nsw i32 %12, %add.10 |
| %13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 |
| %add.12 = add nsw i32 %13, %add.11 |
| %14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 |
| %add.13 = add nsw i32 %14, %add.12 |
| %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 |
| %add.14 = add nsw i32 %15, %add.13 |
| %16 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16 |
| %add.15 = add nsw i32 %16, %add.14 |
| %17 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4 |
| %add.16 = add nsw i32 %17, %add.15 |
| %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8 |
| %add.17 = add nsw i32 %18, %add.16 |
| %19 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4 |
| %add.18 = add nsw i32 %19, %add.17 |
| %20 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16 |
| %add.19 = add nsw i32 %20, %add.18 |
| %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4 |
| %add.20 = add nsw i32 %21, %add.19 |
| %22 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8 |
| %add.21 = add nsw i32 %22, %add.20 |
| %23 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4 |
| %add.22 = add nsw i32 %23, %add.21 |
| %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16 |
| %add.23 = add nsw i32 %24, %add.22 |
| %25 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4 |
| %add.24 = add nsw i32 %25, %add.23 |
| %26 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8 |
| %add.25 = add nsw i32 %26, %add.24 |
| %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4 |
| %add.26 = add nsw i32 %27, %add.25 |
| %28 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16 |
| %add.27 = add nsw i32 %28, %add.26 |
| %29 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4 |
| %add.28 = add nsw i32 %29, %add.27 |
| %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8 |
| %add.29 = add nsw i32 %30, %add.28 |
| %31 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4 |
| %add.30 = add nsw i32 %31, %add.29 |
| store i32 %add.30, i32* %res, align 16 |
| ret void |
| } |
| |
| declare i32 @foobar(i32) |
| |
| define void @i32_red_call(i32 %val) { |
| ; CHECK-LABEL: @i32_red_call( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_call( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) |
| ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| %add.3 = add nsw i32 %4, %add.2 |
| %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| %add.4 = add nsw i32 %5, %add.3 |
| %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| %add.5 = add nsw i32 %6, %add.4 |
| %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| %add.6 = add nsw i32 %7, %add.5 |
| %res = call i32 @foobar(i32 %add.6) |
| ret void |
| } |
| |
| define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 { |
| ; CHECK-LABEL: @i32_red_invoke( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) |
| ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) |
| ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] |
| ; CHECK: exception: |
| ; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 |
| ; CHECK-NEXT: cleanup |
| ; CHECK-NEXT: br label [[NORMAL]] |
| ; CHECK: normal: |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @i32_red_invoke( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 |
| ; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) |
| ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) |
| ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] |
| ; STORE: exception: |
| ; STORE-NEXT: [[CLEANUP:%.*]] = landingpad i8 |
| ; STORE-NEXT: cleanup |
| ; STORE-NEXT: br label [[NORMAL]] |
| ; STORE: normal: |
| ; STORE-NEXT: ret void |
| ; |
| entry: |
| %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 |
| %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 |
| %add = add nsw i32 %1, %0 |
| %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 |
| %add.1 = add nsw i32 %2, %add |
| %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 |
| %add.2 = add nsw i32 %3, %add.1 |
| %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 |
| %add.3 = add nsw i32 %4, %add.2 |
| %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 |
| %add.4 = add nsw i32 %5, %add.3 |
| %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 |
| %add.5 = add nsw i32 %6, %add.4 |
| %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 |
| %add.6 = add nsw i32 %7, %add.5 |
| %res = invoke i32 @foobar(i32 %add.6) to label %normal unwind label %exception |
| exception: |
| %cleanup = landingpad i8 cleanup |
| br label %normal |
| normal: |
| ret void |
| } |
| |
| ; Test case from PR47670. Reduction result is used as incoming value in phi. |
| define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) { |
| ; CHECK-LABEL: @reduction_result_used_in_phi( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] |
| ; CHECK: bb: |
| ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 |
| ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 |
| ; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) |
| ; CHECK-NEXT: br label [[EXIT]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_1]] |
| ; |
| ; STORE-LABEL: @reduction_result_used_in_phi( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] |
| ; STORE: bb: |
| ; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 |
| ; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 |
| ; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 |
| ; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) |
| ; STORE-NEXT: br label [[EXIT]] |
| ; STORE: exit: |
| ; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] |
| ; STORE-NEXT: ret i32 [[SUM_1]] |
| ; |
| entry: |
| br i1 %b, label %bb, label %exit |
| |
| bb: |
| %l.0 = load i32, i32* %data, align 4 |
| %idx.1 = getelementptr inbounds i32, i32* %data, i64 1 |
| %l.1 = load i32, i32* %idx.1, align 4 |
| %add.1 = add i32 %l.1, %l.0 |
| %idx.2 = getelementptr inbounds i32, i32* %data, i64 2 |
| %l.2 = load i32, i32* %idx.2, align 4 |
| %add.2 = add i32 %l.2, %add.1 |
| %idx.3 = getelementptr inbounds i32, i32* %data, i64 3 |
| %l.3 = load i32, i32* %idx.3, align 4 |
| %add.3 = add i32 %l.3, %add.2 |
| br label %exit |
| |
| exit: |
| %sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb] |
| ret i32 %sum.1 |
| } |
| |
| define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) { |
| ; CHECK-LABEL: @reduction_result_used_in_phi_loop( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] |
| ; CHECK: bb: |
| ; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 |
| ; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 |
| ; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) |
| ; CHECK-NEXT: br label [[EXIT]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_1]] |
| ; |
| ; STORE-LABEL: @reduction_result_used_in_phi_loop( |
| ; STORE-NEXT: entry: |
| ; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] |
| ; STORE: bb: |
| ; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1 |
| ; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 |
| ; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 |
| ; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>* |
| ; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 |
| ; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) |
| ; STORE-NEXT: br label [[EXIT]] |
| ; STORE: exit: |
| ; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] |
| ; STORE-NEXT: ret i32 [[SUM_1]] |
| ; |
| entry: |
| br i1 %b, label %bb, label %exit |
| |
| bb: |
| %l.0 = load i32, i32* %data, align 4 |
| %idx.1 = getelementptr inbounds i32, i32* %data, i64 1 |
| %l.1 = load i32, i32* %idx.1, align 4 |
| %add.1 = add i32 %l.1, %l.0 |
| %idx.2 = getelementptr inbounds i32, i32* %data, i64 2 |
| %l.2 = load i32, i32* %idx.2, align 4 |
| %add.2 = add i32 %l.2, %add.1 |
| %idx.3 = getelementptr inbounds i32, i32* %data, i64 3 |
| %l.3 = load i32, i32* %idx.3, align 4 |
| %add.3 = add i32 %l.3, %add.2 |
| br label %exit |
| |
| exit: |
| %sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb] |
| ret i32 %sum.1 |
| } |
| |
| ; Make sure we do not crash or infinite loop on ill-formed IR. |
| |
| define void @unreachable_block() { |
| ; CHECK-LABEL: @unreachable_block( |
| ; CHECK-NEXT: bb.0: |
| ; CHECK-NEXT: br label [[BB_1:%.*]] |
| ; CHECK: dead: |
| ; CHECK-NEXT: [[T0:%.*]] = add i16 [[T0]], undef |
| ; CHECK-NEXT: br label [[BB_1]] |
| ; CHECK: bb.1: |
| ; CHECK-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] |
| ; CHECK-NEXT: ret void |
| ; |
| ; STORE-LABEL: @unreachable_block( |
| ; STORE-NEXT: bb.0: |
| ; STORE-NEXT: br label [[BB_1:%.*]] |
| ; STORE: dead: |
| ; STORE-NEXT: [[T0:%.*]] = add i16 [[T0]], undef |
| ; STORE-NEXT: br label [[BB_1]] |
| ; STORE: bb.1: |
| ; STORE-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] |
| ; STORE-NEXT: ret void |
| ; |
| bb.0: |
| br label %bb.1 |
| |
| dead: |
| %t0 = add i16 %t0, undef ; unreachable IR may depend on itself |
| br label %bb.1 |
| |
| bb.1: |
| %t1 = phi i16 [ undef, %bb.0 ], [ %t0, %dead ] |
| ret void |
| } |
| |
| ; The FMF on the reduction should match the incoming insts. |
| |
| define float @fadd_v4f32_fmf(float* %p) { |
| ; CHECK-LABEL: @fadd_v4f32_fmf( |
| ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 |
| ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 |
| ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 |
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) |
| ; CHECK-NEXT: ret float [[TMP3]] |
| ; |
| ; STORE-LABEL: @fadd_v4f32_fmf( |
| ; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 |
| ; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 |
| ; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 |
| ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* |
| ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; STORE-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) |
| ; STORE-NEXT: ret float [[TMP3]] |
| ; |
| %p1 = getelementptr inbounds float, float* %p, i64 1 |
| %p2 = getelementptr inbounds float, float* %p, i64 2 |
| %p3 = getelementptr inbounds float, float* %p, i64 3 |
| %t0 = load float, float* %p, align 4 |
| %t1 = load float, float* %p1, align 4 |
| %t2 = load float, float* %p2, align 4 |
| %t3 = load float, float* %p3, align 4 |
| %add1 = fadd reassoc nsz float %t1, %t0 |
| %add2 = fadd reassoc nsz float %t2, %add1 |
| %add3 = fadd reassoc nsz float %t3, %add2 |
| ret float %add3 |
| } |
| |
| ; The minimal FMF for fadd reduction are "reassoc nsz". |
| ; Only the common FMF of all operations in the reduction propagate to the result. |
| ; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags. |
| |
| define float @fadd_v4f32_fmf_intersect(float* %p) { |
| ; CHECK-LABEL: @fadd_v4f32_fmf_intersect( |
| ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 |
| ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 |
| ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 |
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) |
| ; CHECK-NEXT: ret float [[TMP3]] |
| ; |
| ; STORE-LABEL: @fadd_v4f32_fmf_intersect( |
| ; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 |
| ; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 |
| ; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 |
| ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* |
| ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 |
| ; STORE-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) |
| ; STORE-NEXT: ret float [[TMP3]] |
| ; |
| %p1 = getelementptr inbounds float, float* %p, i64 1 |
| %p2 = getelementptr inbounds float, float* %p, i64 2 |
| %p3 = getelementptr inbounds float, float* %p, i64 3 |
| %t0 = load float, float* %p, align 4 |
| %t1 = load float, float* %p1, align 4 |
| %t2 = load float, float* %p2, align 4 |
| %t3 = load float, float* %p3, align 4 |
| %add1 = fadd ninf reassoc nsz nnan float %t1, %t0 |
| %add2 = fadd ninf reassoc nsz nnan arcp float %t2, %add1 |
| %add3 = fadd ninf reassoc nsz contract float %t3, %add2 |
| ret float %add3 |
| } |
| |
| declare i32 @__gxx_personality_v0(...) |