blob: ab3c87cca56ea286a88aa9000040c169ef970319 [file] [log] [blame]
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32
omp.declare_reduction @add_reduction_f32 : f32 init {
^bb0(%arg0: f32):
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
omp.yield(%0 : f32)
} combiner {
^bb0(%arg0: f32, %arg1: f32):
%0 = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath<contract>} : f32
omp.yield(%0 : f32)
}
llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) {
%0 = llvm.mlir.constant(0.000000e+00 : f32) : f32
%1 = llvm.mlir.constant(1 : i32) : i32
%2 = llvm.mlir.constant(1024 : i32) : i32
%3 = llvm.mlir.constant(1 : i64) : i64
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
llvm.store %0, %arg1 : f32, !llvm.ptr
omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(@add_reduction_f32 %arg1 -> %arg3 : !llvm.ptr) {
omp.loop_nest (%arg4) : i32 = (%1) to (%2) inclusive step (%1) {
llvm.store %arg4, %arg2 : i32, !llvm.ptr
%5 = llvm.load %arg3 : !llvm.ptr -> f32
%6 = llvm.load %arg2 : !llvm.ptr -> i32
%7 = llvm.sext %6 : i32 to i64
%8 = llvm.sub %7, %3 overflow<nsw> : i64
%9 = llvm.getelementptr %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%10 = llvm.load %9 : !llvm.ptr -> f32
%11 = llvm.fadd %5, %10 {fastmathFlags = #llvm.fastmath<contract>} : f32
llvm.store %11, %arg3 : f32, !llvm.ptr
omp.yield
}
}
llvm.return
}
// CHECK-LABEL: define void @_QPsimd_reduction(
// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4
// CHECK: store float 0.000000e+00, ptr %[[ORIG_SUM:.*]], align 4
// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4
// CHECK: %[[RED_VAR:.*]] = alloca float, align 4
// CHECK: br label %[[VAL_4:.*]]
// CHECK: omp.region.after_alloca: ; preds = %[[VAL_5:.*]]
// CHECK: br label %[[VAL_6:.*]]
// CHECK: entry: ; preds = %[[VAL_4]]
// CHECK: br label %[[VAL_7:.*]]
// CHECK: omp.private.init: ; preds = %[[VAL_6]]
// CHECK: br label %[[VAL_8:.*]]
// CHECK: omp.reduction.init: ; preds = %[[VAL_7]]
// CHECK: store float 0.000000e+00, ptr %[[RED_VAR]], align 4
// CHECK: br label %[[VAL_9:.*]]
// CHECK: omp.simd.region: ; preds = %[[VAL_8]]
// CHECK: br label %[[VAL_10:.*]]
// CHECK: omp_loop.preheader: ; preds = %[[VAL_9]]
// CHECK: br label %[[VAL_11:.*]]
// CHECK: omp_loop.header: ; preds = %[[VAL_12:.*]], %[[VAL_10]]
// CHECK: %[[VAL_13:.*]] = phi i32 [ 0, %[[VAL_10]] ], [ %[[VAL_14:.*]], %[[VAL_12]] ]
// CHECK: br label %[[VAL_15:.*]]
// CHECK: omp_loop.cond: ; preds = %[[VAL_11]]
// CHECK: %[[VAL_16:.*]] = icmp ult i32 %[[VAL_13]], 1024
// CHECK: br i1 %[[VAL_16]], label %[[VAL_17:.*]], label %[[VAL_18:.*]]
// CHECK: omp_loop.body: ; preds = %[[VAL_15]]
// CHECK: %[[VAL_19:.*]] = mul i32 %[[VAL_13]], 1
// CHECK: %[[VAL_20:.*]] = add i32 %[[VAL_19]], 1
// CHECK: br label %[[VAL_21:.*]]
// CHECK: omp.loop_nest.region: ; preds = %[[VAL_17]]
// CHECK: store i32 %[[VAL_20]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]]
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
// CHECK: %[[VAL_23:.*]] = load i32, ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
// CHECK: %[[VAL_24:.*]] = sext i32 %[[VAL_23]] to i64
// CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_24]], 1
// CHECK: %[[VAL_26:.*]] = getelementptr float, ptr %[[VAL_27:.*]], i64 %[[VAL_25]]
// CHECK: %[[VAL_28:.*]] = load float, ptr %[[VAL_26]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
// CHECK: %[[VAL_29:.*]] = fadd contract float %[[RED_VAL]], %[[VAL_28]]
// CHECK: store float %[[VAL_29]], ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]]
// CHECK: br label %[[VAL_30:.*]]
// CHECK: omp.region.cont1: ; preds = %[[VAL_21]]
// CHECK: br label %[[VAL_12]]
// CHECK: omp_loop.inc: ; preds = %[[VAL_30]]
// CHECK: %[[VAL_14]] = add nuw i32 %[[VAL_13]], 1
// CHECK: br label %[[VAL_11]], !llvm.loop ![[LOOP:.*]]
// CHECK: omp_loop.exit: ; preds = %[[VAL_15]]
// CHECK: br label %[[VAL_31:.*]]
// CHECK: omp_loop.after: ; preds = %[[VAL_18]]
// CHECK: br label %[[VAL_32:.*]]
// CHECK: omp.region.cont: ; preds = %[[VAL_31]]
// CHECK: %[[SUM_VAL:.*]] = load float, ptr %[[ORIG_SUM]], align 4
// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4
// CHECK: %[[COMBINED_VAL:.*]] = fadd contract float %[[SUM_VAL]], %[[RED_VAL]]
// CHECK: store float %[[COMBINED_VAL]], ptr %[[ORIG_SUM]], align 4
// CHECK: ret void
// CHECK: ![[ACCESS_GROUP]] = distinct !{}
// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]}
// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]}
// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}