| // RUN: %libomptarget-compile-generic && %libomptarget-run-generic |
| // RUN: %libomptarget-compileopt-generic && %libomptarget-run-generic |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| int main() { |
| long unsigned *DP = 0; |
| int N = 32; |
| int Threads = 64; |
| int Teams = 10; |
| |
| // Allocate ~55MB on the device. |
| #pragma omp target map(from : DP) |
| DP = (long unsigned *)malloc(sizeof(long unsigned) * N * Threads * Teams); |
| |
| #pragma omp target teams distribute parallel for num_teams(Teams) \ |
| thread_limit(Threads) is_device_ptr(DP) |
| for (int i = 0; i < Threads * Teams; ++i) { |
| for (int j = 0; j < N; ++j) { |
| DP[i * N + j] = i + j; |
| } |
| } |
| |
| long unsigned s = 0; |
| #pragma omp target teams distribute parallel for num_teams(Teams) \ |
| thread_limit(Threads) reduction(+ : s) |
| for (int i = 0; i < Threads * Teams; ++i) { |
| for (int j = 0; j < N; ++j) { |
| s += DP[i * N + j]; |
| } |
| } |
| |
| // CHECK: Sum: 6860800 |
| printf("Sum: %li\n", s); |
| return 0; |
| } |