| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --prefix-filecheck-ir-name _ --global-value-regex "llvm.compiler.used" "_[0-9a-zA-Z]+A[0-9a-zA-Z]+pi[0-9a-zA-Z]+" "_[0-9a-zA-Z]+anotherPi" --version 2 |
| // REQUIRES: amdgpu-registered-target |
| |
| |
| // Test target codegen - host bc file has to be created first. |
| // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-amd.bc |
| // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-target-debug -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-amd.bc -o - | FileCheck %s --check-prefix=CHECK-AMD |
| |
| // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-nvidia.bc |
| // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-unknown-unknown -emit-llvm %s -fopenmp-target-debug -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-nvidia.bc -o - | FileCheck %s --check-prefix=CHECK-NVIDIA |
| |
| // expected-no-diagnostics |
| |
| #ifndef HEADER |
| #define HEADER |
| |
| typedef enum omp_allocator_handle_t { |
| omp_null_allocator = 0, |
| omp_default_mem_alloc = 1, |
| omp_large_cap_mem_alloc = 2, |
| omp_const_mem_alloc = 3, |
| omp_high_bw_mem_alloc = 4, |
| omp_low_lat_mem_alloc = 5, |
| omp_cgroup_mem_alloc = 6, |
| omp_pteam_mem_alloc = 7, |
| omp_thread_mem_alloc = 8, |
| KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ |
| } omp_allocator_handle_t; |
| |
| int main() |
| { |
| int N = 10000; |
| int *a = new int[N]; |
| #pragma omp target data map(tofrom:a[:N]) |
| { |
| #pragma omp target teams distribute parallel for |
| for(int i = 0; i < N; i++) |
| { |
| int local_a[10]; |
| #pragma omp allocate(local_a) allocator(omp_pteam_mem_alloc) |
| for(int j = 0; j < 10; j++) |
| local_a[j] = a[(i + j) % N]; |
| a[i] = local_a[0]; |
| } |
| } |
| return a[17]; |
| } |
| |
| #endif |
| //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| // CHECK-AMD: {{.*}} |
| // CHECK-NVIDIA: {{.*}} |