blob: 6a40c18728bf539e7ee3e942dd9a606c7497f5c3 [file] [log] [blame]
//===------- target_impl.h - AMDGCN OpenMP GPU implementation ----- HIP -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Declarations and definitions of target specific functions and constants
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_AMDGCN_TARGET_IMPL_H
#define OMPTARGET_AMDGCN_TARGET_IMPL_H
#ifndef __AMDGCN__
#error "amdgcn target_impl.h expects to be compiled under __AMDGCN__"
#endif
#include "amdgcn_interface.h"
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#define DEVICE
#define INLINE inline DEVICE
#define NOINLINE __attribute__((noinline)) DEVICE
#define ALIGN(N) __attribute__((aligned(N)))
////////////////////////////////////////////////////////////////////////////////
// Kernel options
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// The following def must match the absolute limit hardwired in the host RTL
// max number of threads per team
#define MAX_THREADS_PER_TEAM 1024
#define WARPSIZE 64
// Maximum number of preallocated arguments to an outlined parallel/simd
// function. Anything more requires dynamic memory allocation.
#define MAX_SHARED_ARGS 20
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#define OMP_STATE_COUNT 32
#define MAX_SM 64
#define OMP_ACTIVE_PARALLEL_LEVEL 128
// Data sharing related quantities, need to match what is used in the compiler.
enum DATA_SHARING_SIZES {
// The maximum number of workers in a kernel.
DS_Max_Worker_Threads = 960,
// The size reserved for data in a shared memory slot.
DS_Slot_Size = 256,
// The slot size that should be reserved for a working warp.
DS_Worker_Warp_Slot_Size = WARPSIZE * DS_Slot_Size,
// The maximum number of warps in use
DS_Max_Warp_Number = 16,
};
enum : __kmpc_impl_lanemask_t {
__kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
};
EXTERN int printf(const char *, ...);
#endif