| ; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=slp-vectorizer %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s |
| |
| %struct.TwoBytes = type { i8, i8 } |
| %struct.FourBytes = type { i8, i8, i8, i8 } |
| %struct.TwoFloats = type { float, float } |
| %struct.FourFloats = type { float, float, float, float } |
| |
| ; CHECK-LABEL: mac_2d_f32_i8_fmuladd: |
| ; CHECK-NOT: v128.load |
| define hidden void @mac_2d_f32_i8_fmuladd(ptr dead_on_unwind noalias writable sret(%struct.TwoFloats) align 4 captures(none) %agg.result, ptr noundef readonly captures(none) %x, ptr noundef readonly captures(none) %y, i32 noundef %n) { |
| entry: |
| %agg.result.promoted = load float, ptr %agg.result, align 4 |
| %cmp18.not = icmp eq i32 %n, 0 |
| br i1 %cmp18.not, label %for.cond.cleanup, label %for.body.lr.ph |
| |
| for.body.lr.ph: |
| %b10 = getelementptr inbounds nuw i8, ptr %agg.result, i32 4 |
| %b10.promoted = load float, ptr %b10, align 4 |
| br label %for.body |
| |
| for.cond.for.cond.cleanup_crit_edge: |
| store float %7, ptr %b10, align 4 |
| br label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| %.lcssa = phi float [ %4, %for.cond.for.cond.cleanup_crit_edge ], [ %agg.result.promoted, %entry ] |
| store float %.lcssa, ptr %agg.result, align 4 |
| ret void |
| |
| for.body: |
| %0 = phi float [ %b10.promoted, %for.body.lr.ph ], [ %7, %for.body ] |
| %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %1 = phi float [ %agg.result.promoted, %for.body.lr.ph ], [ %4, %for.body ] |
| %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %x, i32 %i.019 |
| %2 = load i8, ptr %arrayidx, align 1 |
| %conv = sitofp i8 %2 to float |
| %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %y, i32 %i.019 |
| %3 = load i8, ptr %arrayidx1, align 1 |
| %conv3 = sitofp i8 %3 to float |
| %4 = tail call float @llvm.fmuladd.f32(float %conv, float %conv3, float %1) |
| %b = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1 |
| %5 = load i8, ptr %b, align 1 |
| %conv6 = sitofp i8 %5 to float |
| %b8 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1 |
| %6 = load i8, ptr %b8, align 1 |
| %conv9 = sitofp i8 %6 to float |
| %7 = tail call float @llvm.fmuladd.f32(float %conv6, float %conv9, float %0) |
| %inc = add nuw i32 %i.019, 1 |
| %exitcond.not = icmp eq i32 %inc, %n |
| br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body |
| } |
| |
| ; CHECK-LABEL: mac_2d_f32_i8: |
| ; CHECK-NOT: v128.load |
| define hidden void @mac_2d_f32_i8(ptr dead_on_unwind noalias writable sret(%struct.TwoFloats) align 4 captures(none) %agg.result, ptr noundef readonly captures(none) %x, ptr noundef readonly captures(none) %y, i32 noundef %n) { |
| entry: |
| %agg.result.promoted = load float, ptr %agg.result, align 4 |
| %cmp18.not = icmp eq i32 %n, 0 |
| br i1 %cmp18.not, label %for.cond.cleanup, label %for.body.lr.ph |
| |
| for.body.lr.ph: |
| %b10 = getelementptr inbounds nuw i8, ptr %agg.result, i32 4 |
| %b10.promoted = load float, ptr %b10, align 4 |
| br label %for.body |
| |
| for.cond.for.cond.cleanup_crit_edge: |
| store float %7, ptr %b10, align 4 |
| br label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| %.lcssa = phi float [ %4, %for.cond.for.cond.cleanup_crit_edge ], [ %agg.result.promoted, %entry ] |
| store float %.lcssa, ptr %agg.result, align 4 |
| ret void |
| |
| for.body: |
| %0 = phi float [ %b10.promoted, %for.body.lr.ph ], [ %7, %for.body ] |
| %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %1 = phi float [ %agg.result.promoted, %for.body.lr.ph ], [ %4, %for.body ] |
| %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %x, i32 %i.019 |
| %2 = load i8, ptr %arrayidx, align 1 |
| %conv = sitofp i8 %2 to float |
| %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %y, i32 %i.019 |
| %3 = load i8, ptr %arrayidx1, align 1 |
| %conv3 = sitofp i8 %3 to float |
| %fmul = fmul float %conv, %conv3 |
| %4 = fadd float %fmul, %1 |
| %b = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1 |
| %5 = load i8, ptr %b, align 1 |
| %conv6 = sitofp i8 %5 to float |
| %b8 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1 |
| %6 = load i8, ptr %b8, align 1 |
| %conv9 = sitofp i8 %6 to float |
| %fmul.1 = fmul float %conv6, %conv9 |
| %7 = fadd float %fmul.1, %0 |
| %inc = add nuw i32 %i.019, 1 |
| %exitcond.not = icmp eq i32 %inc, %n |
| br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body |
| } |
| |
| declare float @llvm.fmuladd.f32(float, float, float) |
| |
| ; CHECK-LABEL: mac_4d_f32_i8_fmuladd: |
| ; CHECK: loop |
| ; CHECK: v128.load32_zero |
| ; CHECK: i16x8.extend_low_i8x16_s |
| ; CHECK: i32x4.extend_low_i16x8_s |
| ; CHECK: f32x4.convert_i32x4_s |
| ; CHECK: v128.load32_zero |
| ; CHECK: i16x8.extend_low_i8x16_s |
| ; CHECK: i32x4.extend_low_i16x8_s |
| ; CHECK: f32x4.convert_i32x4_s |
| ; CHECK: f32x4.mul |
| ; CHECK: f32x4.add |
| define hidden void @mac_4d_f32_i8_fmuladd(ptr dead_on_unwind noalias writable sret(%struct.FourFloats) align 4 captures(none) %agg.result, ptr noundef readonly captures(none) %x, ptr noundef readonly captures(none) %y, i32 noundef %n) { |
| entry: |
| %agg.result.promoted = load float, ptr %agg.result, align 4 |
| %cmp38.not = icmp eq i32 %n, 0 |
| br i1 %cmp38.not, label %for.cond.cleanup, label %for.body.lr.ph |
| |
| for.body.lr.ph: |
| %b10 = getelementptr inbounds nuw i8, ptr %agg.result, i32 4 |
| %c16 = getelementptr inbounds nuw i8, ptr %agg.result, i32 8 |
| %d22 = getelementptr inbounds nuw i8, ptr %agg.result, i32 12 |
| %b10.promoted = load float, ptr %b10, align 4 |
| %c16.promoted = load float, ptr %c16, align 4 |
| %d22.promoted = load float, ptr %d22, align 4 |
| br label %for.body |
| |
| for.cond.for.cond.cleanup_crit_edge: |
| store float %9, ptr %b10, align 4 |
| store float %12, ptr %c16, align 4 |
| store float %15, ptr %d22, align 4 |
| br label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| %.lcssa = phi float [ %6, %for.cond.for.cond.cleanup_crit_edge ], [ %agg.result.promoted, %entry ] |
| store float %.lcssa, ptr %agg.result, align 4 |
| ret void |
| |
| for.body: |
| %0 = phi float [ %d22.promoted, %for.body.lr.ph ], [ %15, %for.body ] |
| %1 = phi float [ %c16.promoted, %for.body.lr.ph ], [ %12, %for.body ] |
| %2 = phi float [ %b10.promoted, %for.body.lr.ph ], [ %9, %for.body ] |
| %i.039 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %3 = phi float [ %agg.result.promoted, %for.body.lr.ph ], [ %6, %for.body ] |
| %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %x, i32 %i.039 |
| %4 = load i8, ptr %arrayidx, align 1 |
| %conv = sitofp i8 %4 to float |
| %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %y, i32 %i.039 |
| %5 = load i8, ptr %arrayidx1, align 1 |
| %conv3 = sitofp i8 %5 to float |
| %6 = tail call float @llvm.fmuladd.f32(float %conv, float %conv3, float %3) |
| %b = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1 |
| %7 = load i8, ptr %b, align 1 |
| %conv6 = sitofp i8 %7 to float |
| %b8 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1 |
| %8 = load i8, ptr %b8, align 1 |
| %conv9 = sitofp i8 %8 to float |
| %9 = tail call float @llvm.fmuladd.f32(float %conv6, float %conv9, float %2) |
| %c = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2 |
| %10 = load i8, ptr %c, align 1 |
| %conv12 = sitofp i8 %10 to float |
| %c14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2 |
| %11 = load i8, ptr %c14, align 1 |
| %conv15 = sitofp i8 %11 to float |
| %12 = tail call float @llvm.fmuladd.f32(float %conv12, float %conv15, float %1) |
| %d = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3 |
| %13 = load i8, ptr %d, align 1 |
| %conv18 = sitofp i8 %13 to float |
| %d20 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3 |
| %14 = load i8, ptr %d20, align 1 |
| %conv21 = sitofp i8 %14 to float |
| %15 = tail call float @llvm.fmuladd.f32(float %conv18, float %conv21, float %0) |
| %inc = add nuw i32 %i.039, 1 |
| %exitcond.not = icmp eq i32 %inc, %n |
| br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body |
| } |
| |
| ; CHECK-LABEL: mac_4d_f32_i8: |
| ; CHECK: loop |
| ; CHECK: v128.load32_zero |
| ; CHECK: i16x8.extend_low_i8x16_s |
| ; CHECK: i32x4.extend_low_i16x8_s |
| ; CHECK: f32x4.convert_i32x4_s |
| ; CHECK: v128.load32_zero |
| ; CHECK: i16x8.extend_low_i8x16_s |
| ; CHECK: i32x4.extend_low_i16x8_s |
| ; CHECK: f32x4.convert_i32x4_s |
| ; CHECK: f32x4.mul |
| ; CHECK: f32x4.add |
| define hidden void @mac_4d_f32_i8(ptr dead_on_unwind noalias writable sret(%struct.FourFloats) align 4 captures(none) %agg.result, ptr noundef readonly captures(none) %x, ptr noundef readonly captures(none) %y, i32 noundef %n) { |
| entry: |
| %agg.result.promoted = load float, ptr %agg.result, align 4 |
| %cmp38.not = icmp eq i32 %n, 0 |
| br i1 %cmp38.not, label %for.cond.cleanup, label %for.body.lr.ph |
| |
| for.body.lr.ph: |
| %b10 = getelementptr inbounds nuw i8, ptr %agg.result, i32 4 |
| %c16 = getelementptr inbounds nuw i8, ptr %agg.result, i32 8 |
| %d22 = getelementptr inbounds nuw i8, ptr %agg.result, i32 12 |
| %b10.promoted = load float, ptr %b10, align 4 |
| %c16.promoted = load float, ptr %c16, align 4 |
| %d22.promoted = load float, ptr %d22, align 4 |
| br label %for.body |
| |
| for.cond.for.cond.cleanup_crit_edge: |
| store float %9, ptr %b10, align 4 |
| store float %12, ptr %c16, align 4 |
| store float %15, ptr %d22, align 4 |
| br label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| %.lcssa = phi float [ %6, %for.cond.for.cond.cleanup_crit_edge ], [ %agg.result.promoted, %entry ] |
| store float %.lcssa, ptr %agg.result, align 4 |
| ret void |
| |
| for.body: |
| %0 = phi float [ %d22.promoted, %for.body.lr.ph ], [ %15, %for.body ] |
| %1 = phi float [ %c16.promoted, %for.body.lr.ph ], [ %12, %for.body ] |
| %2 = phi float [ %b10.promoted, %for.body.lr.ph ], [ %9, %for.body ] |
| %i.039 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] |
| %3 = phi float [ %agg.result.promoted, %for.body.lr.ph ], [ %6, %for.body ] |
| %arrayidx = getelementptr inbounds nuw %struct.FourBytes, ptr %x, i32 %i.039 |
| %4 = load i8, ptr %arrayidx, align 1 |
| %conv = sitofp i8 %4 to float |
| %arrayidx1 = getelementptr inbounds nuw %struct.FourBytes, ptr %y, i32 %i.039 |
| %5 = load i8, ptr %arrayidx1, align 1 |
| %conv3 = sitofp i8 %5 to float |
| %fmul = fmul float %conv, %conv3 |
| %6 = fadd float %fmul, %3 |
| %b = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1 |
| %7 = load i8, ptr %b, align 1 |
| %conv6 = sitofp i8 %7 to float |
| %b8 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1 |
| %8 = load i8, ptr %b8, align 1 |
| %conv9 = sitofp i8 %8 to float |
| %fmul.1 = fmul float %conv6, %conv9 |
| %9 = fadd float %fmul.1, %2 |
| %c = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2 |
| %10 = load i8, ptr %c, align 1 |
| %conv12 = sitofp i8 %10 to float |
| %c14 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2 |
| %11 = load i8, ptr %c14, align 1 |
| %conv15 = sitofp i8 %11 to float |
| %fmul.2 = fmul float %conv12, %conv15 |
| %12 = fadd float %fmul.2, %1 |
| %d = getelementptr inbounds nuw i8, ptr %arrayidx, i32 3 |
| %13 = load i8, ptr %d, align 1 |
| %conv18 = sitofp i8 %13 to float |
| %d20 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 3 |
| %14 = load i8, ptr %d20, align 1 |
| %conv21 = sitofp i8 %14 to float |
| %fmul.3 = fmul float %conv18, %conv21 |
| %15 = fadd float %fmul.3, %0 |
| %inc = add nuw i32 %i.039, 1 |
| %exitcond.not = icmp eq i32 %inc, %n |
| br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body |
| } |