blob: 50afb68c4ce993f65dd00ac9383a84c6109919e4 [file] [edit]
// RUN: split-file %s %t
// RUN: mlir-translate --mlir-to-llvmir %t/host.mlir | FileCheck %s --check-prefix=CHECK
// RUN: mlir-translate --mlir-to-llvmir %t/target.mlir | FileCheck %s --check-prefix=TARGET
//--- host.mlir
// --------------------------------------------------------------------
// Affinity clause
// --------------------------------------------------------------------
llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64, %j: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_1d
// Preheader -> Header
// CHECK: omp_iterator.preheader:
// CHECK: br label %omp_iterator.header
//
// Header has the IV phi and branches to cond
// CHECK: omp_iterator.header:
// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
// CHECK: br label %omp_iterator.cond
//
// Cond: IV < 24 and branches to body or exit
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 24
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
//
// Exit -> After -> continuation
// CHECK: omp_iterator.exit:
// CHECK: br label %omp_iterator.after
// CHECK: omp_iterator.after:
// CHECK: br label %omp.it.cont
//
// Body: store into affinity_list[IV] then branch to inc
// CHECK: omp_iterator.body:
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP]]
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: br label %omp_iterator.inc
//
// CHECK: omp_iterator.inc:
// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
// CHECK: br label %omp_iterator.header
llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c2 = llvm.mlir.constant(2 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
// 3-D iterator: i=1..4, j=1..6, k=1..2 => total trips = 48
%it = omp.iterator(%i: i64, %j: i64, %k: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1, %c1 to %c2 step %c1) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_3d
// Preheader -> Header
// CHECK: omp_iterator.preheader:
// CHECK: br label %omp_iterator.header
//
// Header has the IV phi and branches to cond
// CHECK: omp_iterator.header:
// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
// CHECK: br label %omp_iterator.cond
//
// Cond: IV < 48 and branches to body or exit
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 48
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
//
// Exit -> After -> continuation
// CHECK: omp_iterator.exit:
// CHECK: br label %omp_iterator.after
// CHECK: omp_iterator.after:
// CHECK: br label %omp.it.cont
//
// Body: store into affinity_list[IV] then branch to inc
// CHECK: omp_iterator.body:
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP]]
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: br label %omp_iterator.inc
//
// CHECK: omp_iterator.inc:
// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
// CHECK: br label %omp_iterator.header
llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c3 = llvm.mlir.constant(3 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
// First iterator: 2-D (4 * 6 = 24)
%it0 = omp.iterator(%i: i64, %j: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
%entry0 = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry0 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
// second iterator: 1-D (3)
%it1 = omp.iterator(%k: i64) = (%c1 to %c3 step %c1) {
%entry1 = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
// Multiple iterators in a single affinity clause.
omp.task affinity(%it0: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>,
%it1: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_multiple
// CHECK-DAG: [[AFFLIST0:%.*]] = alloca { i64, i64, i32 }, i64 24, align 8
// CHECK-DAG: [[AFFLIST1:%.*]] = alloca { i64, i64, i32 }, i64 3, align 8
// CHECK-DAG: [[AFFINITY_LIST:%.*]] = alloca { i64, i64, i32 }, i32 27, align 8
// First iterator header
// CHECK: omp_iterator.preheader:
// CHECK: br label %[[HEADER0:.+]]
// CHECK: [[HEADER0]]:
// CHECK: [[IV0:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT0:%.*]], %[[INC0:.+]] ]
// CHECK: br label %[[COND0:.+]]
// CHECK: [[COND0]]:
// CHECK: [[CMP0:%.*]] = icmp ult i64 [[IV0]], 24
// CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit
// Second iterator header
// CHECK: omp_iterator.preheader{{.*}}:
// CHECK: [[HEADER1:.+]]:
// CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ]
// CHECK: br label %omp_iterator.cond{{.*}}
// CHECK: omp_iterator.cond{{.*}}:
// CHECK: [[CMP1:%.*]] = icmp ult i64 [[IV1]], 3
// CHECK: br i1 [[CMP1]], label %[[BODY1:.+]], label %omp_iterator.exit{{.*}}
// CHECK: [[AFFINITY_LIST_1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 0
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_1]], ptr align 1 [[AFFLIST0]], i64 480, i1 false)
// CHECK: [[AFFINITY_LIST_2:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 24
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_2]], ptr align 1 [[AFFLIST1]], i64 60, i1 false)
// CHECK: codeRepl:
// CHECK: call ptr @__kmpc_omp_task_alloc
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 27{{.*}}ptr [[AFFINITY_LIST]]
// CHECK: call i32 @__kmpc_omp_task
// Second iterator body
// CHECK: [[BODY1]]:
// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST1]]
// CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0
// CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]]
// CHECK: [[LENGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP1]]
// CHECK: [[FLAGGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP1]]
// CHECK: br label %[[INC1]]
// CHECK: [[INC1]]:
// CHECK: [[NEXT1]] = add nuw i64 [[IV1]], 1
// CHECK: br label %[[HEADER1]]
// First iterator body
// CHECK: [[BODY0]]:
// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST0]], i64 [[IV0]]
// CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0
// CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]]
// CHECK: [[LENGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP0]]
// CHECK: [[FLAGGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP0]]
// CHECK: br label %[[INC0]]
// CHECK: [[INC0]]:
// CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1
// CHECK: br label %[[HEADER0]]
// Makes sure affinity list only created after dynamic count
llvm.func @task_affinity_iterator_dynamic_tripcount(
%arr: !llvm.ptr {llvm.nocapture}, %lb: i64, %ub: i64, %step: i64,
%len: i64) {
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64) = (%lb to %ub step %step) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_dynamic_tripcount
// CHECK: [[DIFF:%.*]] = sub i64 {{.*}}, {{.*}}
// CHECK: [[DIV:%.*]] = sdiv i64 [[DIFF]], {{.*}}
// CHECK: [[TRIPS:%.*]] = add i64 [[DIV]], 1
// CHECK: [[SCALED:%.*]] = mul i64 1, [[TRIPS]]
// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 [[SCALED]]
llvm.func @task_affinity_iterator_negative_step(%arr: !llvm.ptr {llvm.nocapture}) {
%c4 = llvm.mlir.constant(4 : i64) : i64
%c1 = llvm.mlir.constant(1 : i64) : i64
%cn1 = llvm.mlir.constant(-1 : i64) : i64
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64) = (%c4 to %c1 step %cn1) {
%entry = omp.affinity_entry %arr, %i
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_negative_step
// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 4, align 8
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 %omp_iterator.iv, 4
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
// CHECK: omp_iterator.body:
// CHECK: [[IDX:%.*]] = urem i64 %omp_iterator.iv, 4
// CHECK: [[STEPMUL:%.*]] = mul i64 [[IDX]], -1
// CHECK: [[PHYSIV:%.*]] = add i64 4, [[STEPMUL]]
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 %omp_iterator.iv
// CHECK: [[LENPTR:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 [[PHYSIV]], ptr [[LENPTR]]
// --------------------------------------------------------------------
// Depend clause
// --------------------------------------------------------------------
llvm.func @omp_task_depend_iterator_simple(%addr : !llvm.ptr) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c10 = llvm.mlir.constant(10 : i64) : i64
%step = llvm.mlir.constant(1 : i64) : i64
%it = omp.iterator(%iv: i64) = (%c1 to %c10 step %step) {
omp.yield(%addr : !llvm.ptr)
} -> !omp.iterated<!llvm.ptr>
omp.task depend(taskdependin -> %it : !omp.iterated<!llvm.ptr>) {
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define void @omp_task_depend_iterator_simple
// CHECK-SAME: (ptr %[[ADDR:[0-9]+]])
// CHECK: %[[DEP_ARR:.*]] = tail call ptr @malloc(i64 %mallocsize)
//
// Iterator loop: preheader -> header -> cond -> body -> inc -> header...
// CHECK: omp_dep_iterator.header:
// CHECK: %[[IV:.*]] = phi i64 [ 0, %omp_dep_iterator.preheader ], [ %[[NEXT:.*]], %omp_dep_iterator.inc ]
// CHECK: omp_dep_iterator.cond:
// CHECK: %[[CMP:.*]] = icmp ult i64 %[[IV]], 10
// CHECK: br i1 %[[CMP]], label %omp_dep_iterator.body, label %omp_dep_iterator.exit
//
// Body: store kmp_dep_info at depArray[0 + linearIV]
// CHECK: omp_dep_iterator.body:
// CHECK: %[[IDX:.*]] = add i64 0, %[[IV]]
// CHECK: %[[ENTRY:.*]] = getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR]], i64 %[[IDX]]
// CHECK: %[[BASE_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 0
// CHECK: %[[PTRINT:.*]] = ptrtoint ptr %[[ADDR]] to i64
// CHECK: store i64 %[[PTRINT]], ptr %[[BASE_GEP]]
// CHECK: %[[LEN_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 1
// CHECK: store i64 8, ptr %[[LEN_GEP]]
// CHECK: %[[FLAGS_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 2
// depKind = 1 (DepIn)
// CHECK: store i8 1, ptr %[[FLAGS_GEP]]
//
// CHECK: omp_dep_iterator.inc:
// CHECK: %[[NEXT]] = add nuw i64 %[[IV]], 1
//
// Task creation with deps, then free
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.*}}, i32 %{{.*}}, ptr %{{.*}}, i32 10, ptr %[[DEP_ARR]], i32 0, ptr null)
// CHECK: tail call void @free(ptr %[[DEP_ARR]])
llvm.func @omp_task_depend_iterator_mixed(%addr : !llvm.ptr, %plain : !llvm.ptr) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c10 = llvm.mlir.constant(10 : i64) : i64
%step = llvm.mlir.constant(1 : i64) : i64
%it = omp.iterator(%iv: i64) = (%c1 to %c10 step %step) {
omp.yield(%addr : !llvm.ptr)
} -> !omp.iterated<!llvm.ptr>
omp.task depend(taskdependout -> %plain : !llvm.ptr, taskdependin -> %it : !omp.iterated<!llvm.ptr>) {
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define void @omp_task_depend_iterator_mixed
// CHECK-SAME: (ptr %[[ADDR2:[0-9]+]], ptr %[[PLAIN:[0-9]+]])
// CHECK: %[[DEP_ARR2:.*]] = tail call ptr @malloc(i64 %mallocsize)
//
// Plain entry at index 0
// CHECK: %[[PLAIN_ENTRY:.*]] = getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR2]], i64 0
// CHECK: getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[PLAIN_ENTRY]], i32 0, i32 0
// CHECK: %[[PLAIN_PTRINT:.*]] = ptrtoint ptr %[[PLAIN]] to i64
// CHECK: store i64 %[[PLAIN_PTRINT]], ptr
// depKind = 3 (DepInOut/out)
// CHECK: store i8 3, ptr
//
// Iterator loop for iterated entry starting at offset 1
// CHECK: omp_dep_iterator.body:
// startIdx(1) + linearIV
// CHECK: add i64 1, %omp_dep_iterator.iv
// CHECK: getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR2]]
// depKind = 1 (DepIn)
// CHECK: store i8 1, ptr
//
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.*}}, i32 %{{.*}}, ptr %{{.*}}, i32 11, ptr %[[DEP_ARR2]], i32 0, ptr null)
// CHECK: tail call void @free(ptr %[[DEP_ARR2]])
// Dynamic bounds: iterator bounds are function arguments, so the trip count
// and dep-array size are computed at runtime. The alloca must be placed
// after the trip-count computation (not hoisted to the entry block)
// to avoid "instruction does not dominate all uses" errors.
llvm.func @omp_task_depend_iterator_dynamic(%addr : !llvm.ptr,
%lb : i64, %ub : i64, %step : i64) {
%it = omp.iterator(%iv: i64) = (%lb to %ub step %step) {
omp.yield(%addr : !llvm.ptr)
} -> !omp.iterated<!llvm.ptr>
omp.task depend(taskdependin -> %it : !omp.iterated<!llvm.ptr>) {
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define void @omp_task_depend_iterator_dynamic
//
// Tripcount computation from dynamic bounds
// CHECK: %[[DIFF:.*]] = sub i64 %{{.*}}, %{{.*}}
// CHECK: %[[DIV:.*]] = sdiv i64 %[[DIFF]], %{{.*}}
// CHECK: %[[TRIPS:.*]] = add i64 %[[DIV]], 1
// CHECK: %[[SCALED:.*]] = mul i64 1, %[[TRIPS]]
// Dynamic total = 0 + scaled trip count
// CHECK: %[[TOTAL:.*]] = add i64 0, %[[SCALED]]
//
// Malloc with dynamic size
// CHECK: %[[DEP_ARR:.*]] = tail call ptr @malloc(i64 %mallocsize)
// CHECK: omp_dep_iterator.body:
// CHECK: getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR]]
// NumDeps is truncated to i32 for the runtime call
// CHECK: %[[NDEPS:.*]] = trunc i64 %[[TOTAL]] to i32
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.*}}, i32 %{{.*}}, ptr %{{.*}}, i32 %[[NDEPS]], ptr %[[DEP_ARR]], i32 0, ptr null)
// CHECK: tail call void @free(ptr %[[DEP_ARR]])
// Dynamic bounds with mixed plain + iterated depends.
llvm.func @omp_task_depend_iterator_dynamic_mixed(%addr : !llvm.ptr,
%plain : !llvm.ptr, %lb : i64, %ub : i64, %step : i64) {
%it = omp.iterator(%iv: i64) = (%lb to %ub step %step) {
omp.yield(%addr : !llvm.ptr)
} -> !omp.iterated<!llvm.ptr>
omp.task depend(taskdependout -> %plain : !llvm.ptr, taskdependin -> %it : !omp.iterated<!llvm.ptr>) {
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define void @omp_task_depend_iterator_dynamic_mixed
// CHECK: %[[TRIPS2:.*]] = mul i64 1, %{{.*}}
// total = 1 (plain) + dynamic trip count
// CHECK: %[[TOTAL2:.*]] = add i64 1, %[[TRIPS2]]
// CHECK: %[[DEP_ARR2:.*]] = tail call ptr @malloc(i64 %mallocsize)
// Plain entry at index 0
// CHECK: %[[PLAIN_ENTRY:.*]] = getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR2]], i64 0
// CHECK: store i8 3, ptr
// Iterator loop
// CHECK: omp_dep_iterator.body:
// CHECK: add i64 1, %omp_dep_iterator.iv
// CHECK: %[[NDEPS2:.*]] = trunc i64 %[[TOTAL2]] to i32
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.*}}, i32 %{{.*}}, ptr %{{.*}}, i32 %[[NDEPS2]], ptr %[[DEP_ARR2]], i32 0, ptr null)
// CHECK: tail call void @free(ptr %[[DEP_ARR2]])
//--- target.mlir
// --------------------------------------------------------------------
// Depend clause on target construct
// --------------------------------------------------------------------
// Target construct with iterator-based depend clause.
// The iterator(i=1:10) should allocate a kmp_dep_info[10] array, fill it via
// a dep_iterator loop, then emit __kmpc_omp_wait_deps with ndeps=10.
module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_target_depend_iterator(%addr: !llvm.ptr) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c10 = llvm.mlir.constant(10 : i64) : i64
%step = llvm.mlir.constant(1 : i64) : i64
%it = omp.iterator(%iv: i64) = (%c1 to %c10 step %step) {
omp.yield(%addr : !llvm.ptr)
} -> !omp.iterated<!llvm.ptr>
%map = omp.map.info var_ptr(%addr : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "data"}
omp.target depend(taskdependin -> %it : !omp.iterated<!llvm.ptr>) map_entries(%map -> %arg0 : !llvm.ptr) {
omp.terminator
}
llvm.return
}
}
// TARGET-LABEL: define void @omp_target_depend_iterator
// TARGET-SAME: (ptr %[[ADDR:[0-9]+]])
// TARGET-DAG: %[[DEP_ARR:.*]] = tail call ptr @malloc(i64 %mallocsize)
//
// Iterator loop: preheader -> header -> cond -> body -> inc -> header...
// TARGET: omp_dep_iterator.header:
// TARGET: %[[IV:.*]] = phi i64 [ 0, %omp_dep_iterator.preheader ], [ %[[NEXT:.*]], %omp_dep_iterator.inc ]
// TARGET: omp_dep_iterator.cond:
// TARGET: %[[CMP:.*]] = icmp ult i64 %[[IV]], 10
// TARGET: br i1 %[[CMP]], label %omp_dep_iterator.body, label %omp_dep_iterator.exit
//
// Body: store kmp_dep_info at depArray[0 + linearIV]
// TARGET: omp_dep_iterator.body:
// TARGET: %[[IDX:.*]] = add i64 0, %[[IV]]
// TARGET: %[[ENTRY:.*]] = getelementptr inbounds %struct.kmp_dep_info, ptr %[[DEP_ARR]], i64 %[[IDX]]
// TARGET: %[[BASE_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 0
// TARGET: %[[PTRINT:.*]] = ptrtoint ptr %[[ADDR]] to i64
// TARGET: store i64 %[[PTRINT]], ptr %[[BASE_GEP]]
// TARGET: %[[LEN_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 1
// TARGET: store i64 8, ptr %[[LEN_GEP]]
// TARGET: %[[FLAGS_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[ENTRY]], i32 0, i32 2
// depKind = 1 (DepIn)
// TARGET: store i8 1, ptr %[[FLAGS_GEP]]
//
// TARGET: omp_dep_iterator.inc:
// TARGET: %[[NEXT]] = add nuw i64 %[[IV]], 1
//
// Target task: wait_deps with ndeps=10, then begin_if0/proxy/complete_if0, then free
// TARGET: call void @__kmpc_omp_wait_deps(ptr @{{.*}}, i32 %{{.*}}, i32 10, ptr %[[DEP_ARR]], i32 0, ptr null)
// TARGET: call void @__kmpc_omp_task_begin_if0
// TARGET: call void @.omp_target_task_proxy_func
// TARGET: call void @__kmpc_omp_task_complete_if0
// TARGET: tail call void @free(ptr %[[DEP_ARR]])