| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ |
| // REQUIRES: amdgpu-registered-target |
| |
| // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc |
| // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s |
| // expected-no-diagnostics |
| #ifndef HEADER |
| #define HEADER |
| |
| #define N 1000 |
| |
| int test_amdgcn_target_tid_threads() { |
| int arr[N]; |
| #pragma omp target |
| for (int i = 0; i < N; i++) { |
| arr[i] = 1; |
| } |
| return arr[0]; |
| } |
| |
| int test_amdgcn_target_tid_threads_simd() { |
| int arr[N]; |
| #pragma omp target simd |
| for (int i = 0; i < N; i++) { |
| arr[i] = 1; |
| } |
| return arr[0]; |
| } |
| |
| #endif |
| // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14 |
| // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr |
| // CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr |
| // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr |
| // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]]) |
| // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 |
| // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] |
| // CHECK: user_code.entry: |
| // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: br label [[FOR_COND:%.*]] |
| // CHECK: for.cond: |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000 |
| // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] |
| // CHECK: for.body: |
| // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] |
| // CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: br label [[FOR_INC:%.*]] |
| // CHECK: for.inc: |
| // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] |
| // CHECK: worker.exit: |
| // CHECK-NEXT: ret void |
| // CHECK: for.end: |
| // CHECK-NEXT: call void @__kmpc_target_deinit() |
| // CHECK-NEXT: ret void |
| // |
| // |
| // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23 |
| // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr |
| // CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr |
| // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr |
| // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr |
| // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr |
| // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]]) |
| // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 |
| // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] |
| // CHECK: user_code.entry: |
| // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4 |
| // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] |
| // CHECK: omp.inner.for.cond: |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] |
| // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000 |
| // CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] |
| // CHECK: omp.inner.for.body: |
| // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 |
| // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] |
| // CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] |
| // CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] |
| // CHECK: omp.body.continue: |
| // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] |
| // CHECK: omp.inner.for.inc: |
| // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 |
| // CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] |
| // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] |
| // CHECK: worker.exit: |
| // CHECK-NEXT: ret void |
| // CHECK: omp.inner.for.end: |
| // CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4 |
| // CHECK-NEXT: call void @__kmpc_target_deinit() |
| // CHECK-NEXT: ret void |
| // |