blob: 3ea2d107f072adb66c673d7e8ed6b34905fac9bc [file] [log] [blame]
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
#define N 1000
int test_amdgcn_target_tid_threads() {
int arr[N];
#pragma omp target
for (int i = 0; i < N; i++) {
arr[i] = 1;
}
return arr[0];
}
int test_amdgcn_target_tid_threads_simd() {
int arr[N];
#pragma omp target simd
for (int i = 0; i < N; i++) {
arr[i] = 1;
}
return arr[0];
}
#endif
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: br label [[FOR_COND:%.*]]
// CHECK: for.cond:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK: for.body:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: br label [[FOR_INC:%.*]]
// CHECK: for.inc:
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK: for.end:
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK: omp.inner.for.end:
// CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
//