clang/test/Driver/fat_archive_nvptx.cpp - llvm-project - Git at Google

 // REQUIRES: clang-driver
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target

 // See the steps to create a fat archive are given at the end of the file.

 // Given a FatArchive, clang-offload-bundler should be called to create a
 // device specific archive, which should be passed to clang-nvlink-wrapper.
 // RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
 // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
 // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
 // expected-no-diagnostics

 #ifndef HEADER
 #define HEADER

 #define N 10

 #pragma omp declare target
 // Functions defined in Fat Archive.
 extern "C" void func_present(float *, float *, unsigned);

 #ifdef MISSING
 // Function not defined in the fat archive.
 extern "C" void func_missing(float *, float *, unsigned);
 #endif

 #pragma omp end declare target

 int main() {
   float in[N], out[N], sum = 0;
   unsigned i;

 #pragma omp parallel for
   for (i = 0; i < N; ++i) {
     in[i] = i;
   }

   func_present(in, out, N); // Returns out[i] = a[i] * 0

 #ifdef MISSING
   func_missing(in, out, N); // Should throw an error here
 #endif

 #pragma omp parallel for reduction(+ \
                                    : sum)
   for (i = 0; i < N; ++i)
     sum += out[i];

   if (!sum)
     return 0;
   return sum;
 }

 #endif

 /***********************************************
    Steps to create Fat Archive (libFatArchive.a)
 ************************************************
 ***************** File: func_1.c ***************
 void func_present(float* in, float* out, unsigned n){
   unsigned i;
   #pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
   for(i=0; i<n; ++i){
     out[i] = in[i] * 0;
   }
 }
 *************************************************
 1. Compile source file(s) to generate object file(s)
     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
     clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
     clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o

 2. Create a fat archive by combining all the object file(s)
     llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
 ************************************************/
	// REQUIRES: clang-driver
	// REQUIRES: x86-registered-target
	// REQUIRES: nvptx-registered-target

	// See the steps to create a fat archive are given at the end of the file.

	// Given a FatArchive, clang-offload-bundler should be called to create a
	// device specific archive, which should be passed to clang-nvlink-wrapper.
	// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 \| FileCheck %s
	// CHECK: clang{{.}}"-cc1"{{.}}"-triple" "nvptx64"{{.}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.}}"-o" "[[HOSTBC:..s]]" "-x" "c++"{{.}}.cpp
	// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:..a]]" "-allow-missing-bundles"
	// CHECK: clang-nvlink-wrapper{{.}}"-o" "{{.}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
	// expected-no-diagnostics

	#ifndef HEADER
	#define HEADER

	#define N 10

	#pragma omp declare target
	// Functions defined in Fat Archive.
	extern "C" void func_present(float , float , unsigned);

	#ifdef MISSING
	// Function not defined in the fat archive.
	extern "C" void func_missing(float , float , unsigned);
	#endif

	#pragma omp end declare target

	int main() {
	float in[N], out[N], sum = 0;
	unsigned i;

	#pragma omp parallel for
	for (i = 0; i < N; ++i) {
	in[i] = i;
	}

	func_present(in, out, N); // Returns out[i] = a[i] * 0

	#ifdef MISSING
	func_missing(in, out, N); // Should throw an error here
	#endif

	#pragma omp parallel for reduction(+ \
	: sum)
	for (i = 0; i < N; ++i)
	sum += out[i];

	if (!sum)
	return 0;
	return sum;
	}

	#endif

	/***********************************************
	Steps to create Fat Archive (libFatArchive.a)
	************************************************
	*************** File: func_1.c *************
	void func_present(float* in, float* out, unsigned n){
	unsigned i;
	#pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
	for(i=0; i<n; ++i){
	out[i] = in[i] * 0;
	}
	}
	*************************************************
	1. Compile source file(s) to generate object file(s)
	clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
	clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
	clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
	clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
	clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
	clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o

	2. Create a fat archive by combining all the object file(s)
	llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
	************************************************/