blob: 991041e9c4dca7fc676c2d9cd0e0dd20a0b87c57 [file] [log] [blame]
/*
* Copyright (c) 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
//
// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved.
//
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
#include "pipes.h"
#include "../workgroup/wg.h"
#define __RESERVE_READ_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__reserve_read_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device); \
size_t rid = reserve(&p->read_idx, wi, num_packets); \
\
if (rid + num_packets == wi) { \
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device); \
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device); \
} \
\
return rid; \
}
DO_PIPE_INTERNAL_SIZE(__RESERVE_READ_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__reserve_read_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device);
size_t rid = reserve(&p->read_idx, wi, num_packets);
if (rid + num_packets == wi) {
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device);
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device);
}
return rid;
}
#define __RESERVE_WRITE_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__reserve_write_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device); \
size_t ei = p->end_idx; \
return reserve(&p->write_idx, ri + ei, num_packets); \
}
DO_PIPE_INTERNAL_SIZE(__RESERVE_WRITE_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__reserve_write_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device);
size_t ei = p->end_idx;
return reserve(&p->write_idx, ri + ei, num_packets);
}
// Work group functions
#define __WORK_GROUP_RESERVE_READ_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__work_group_reserve_read_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
__local size_t *t = (__local size_t *)__wg_scratch; \
\
if ((int)get_local_linear_id() == 0) { \
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device); \
size_t rid = reserve(&p->read_idx, wi, num_packets); \
\
if (rid + num_packets == wi) { \
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device); \
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device); \
} \
\
*t = rid; \
} \
\
work_group_barrier(CLK_LOCAL_MEM_FENCE); \
\
return *t; \
}
DO_PIPE_INTERNAL_SIZE(__WORK_GROUP_RESERVE_READ_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__work_group_reserve_read_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
__local size_t *t = (__local size_t *)__wg_scratch;
if ((int)get_local_linear_id() == 0) {
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device);
size_t rid = reserve(&p->read_idx, wi, num_packets);
if (rid + num_packets == wi) {
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device);
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device);
}
*t = rid;
}
work_group_barrier(CLK_LOCAL_MEM_FENCE);
return *t;
}
#define __WORK_GROUP_RESERVE_WRITE_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__work_group_reserve_write_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
__local size_t *t = (__local size_t *)__wg_scratch; \
\
if ((int)get_local_linear_id() == 0) { \
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device); \
size_t ei = p->end_idx; \
*t = reserve(&p->write_idx, ri + ei, num_packets); \
} \
\
work_group_barrier(CLK_LOCAL_MEM_FENCE); \
\
return *t; \
}
DO_PIPE_INTERNAL_SIZE(__WORK_GROUP_RESERVE_WRITE_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__work_group_reserve_write_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
__local size_t *t = (__local size_t *)__wg_scratch;
if ((int)get_local_linear_id() == 0) {
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device);
size_t ei = p->end_idx;
*t = reserve(&p->write_idx, ri + ei, num_packets);
}
work_group_barrier(CLK_LOCAL_MEM_FENCE);
return *t;
}
// sub group functions
#define __SUB_GROUP_RESERVE_READ_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__sub_group_reserve_read_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
size_t rid = ~(size_t)0; \
\
if (get_sub_group_local_id() == 0) { \
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device); \
rid = reserve(&p->read_idx, wi, num_packets); \
\
if (rid + num_packets == wi) { \
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device); \
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device); \
} \
} \
\
return sub_group_broadcast(rid, 0); \
}
DO_PIPE_INTERNAL_SIZE(__SUB_GROUP_RESERVE_READ_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__sub_group_reserve_read_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
size_t rid = ~(size_t)0;
if (get_sub_group_local_id() == 0) {
size_t wi = atomic_load_explicit(&p->write_idx, memory_order_relaxed, memory_scope_device);
rid = reserve(&p->read_idx, wi, num_packets);
if (rid + num_packets == wi) {
atomic_store_explicit(&p->write_idx, 0, memory_order_release, memory_scope_device);
atomic_store_explicit(&p->read_idx, 0, memory_order_relaxed, memory_scope_device);
}
}
return sub_group_broadcast(rid, 0);
}
#define __SUB_GROUP_RESERVE_WRITE_PIPE_INTERNAL_SIZE(SIZE, STYPE) \
__attribute__((weak, always_inline)) size_t \
__sub_group_reserve_write_pipe_internal_##SIZE(__global struct pipeimp *p, uint num_packets) \
{ \
size_t rid = ~(size_t)0; \
\
if (get_sub_group_local_id() == 0) { \
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device); \
size_t ei = p->end_idx; \
rid = reserve(&p->write_idx, ri + ei, num_packets); \
} \
\
return sub_group_broadcast(rid, 0); \
}
DO_PIPE_INTERNAL_SIZE(__SUB_GROUP_RESERVE_WRITE_PIPE_INTERNAL_SIZE)
__attribute__((weak, always_inline)) size_t
__sub_group_reserve_write_pipe_internal_user(__global struct pipeimp *p, uint num_packets, size_t size)
{
size_t rid = ~(size_t)0;
if (get_sub_group_local_id() == 0) {
size_t ri = atomic_load_explicit(&p->read_idx, memory_order_relaxed, memory_scope_device);
size_t ei = p->end_idx;
rid = reserve(&p->write_idx, ri + ei, num_packets);
}
return sub_group_broadcast(rid, 0);
}