| #include <utils.h> |
| |
| #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ |
| DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ |
| return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ |
| return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ |
| return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ |
| return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ |
| return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| } |
| |
| #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ |
| DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \ |
| return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \ |
| return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ |
| FUNCTION(x.z, y.z)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \ |
| return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \ |
| return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \ |
| return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| } |
| |
| #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ |
| DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \ |
| return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \ |
| return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \ |
| FUNCTION(x, y.z)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \ |
| return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \ |
| return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \ |
| return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| } \ |
| \ |
| |
| #define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \ |
| DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \ |
| return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \ |
| return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ |
| FUNCTION(x.z, y.z, z.z)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \ |
| return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \ |
| return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \ |
| return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ |
| } |
| |
| #define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
| ARG2_TYPE, ARG3_TYPE) \ |
| DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \ |
| return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \ |
| return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \ |
| FUNCTION(x, y, z.z)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \ |
| return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \ |
| return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ |
| } \ |
| \ |
| DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \ |
| return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ |
| } |
| |
| #define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
| ADDR_SPACE, ARG2_TYPE) \ |
| DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ |
| FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ |
| ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ |
| return (__CLC_XCONCAT(RET_TYPE, 2))( \ |
| FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
| FUNCTION(x.y, \ |
| (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \ |
| } \ |
| \ |
| DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ |
| FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ |
| ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ |
| return (__CLC_XCONCAT(RET_TYPE, 3))( \ |
| FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
| FUNCTION(x.y, \ |
| (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \ |
| FUNCTION(x.z, \ |
| (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| } \ |
| \ |
| DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ |
| FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ |
| ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ |
| return (__CLC_XCONCAT(RET_TYPE, 4))( \ |
| FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \ |
| FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
| ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| } \ |
| \ |
| DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ |
| FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ |
| ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ |
| return (__CLC_XCONCAT(RET_TYPE, 8))( \ |
| FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \ |
| FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
| ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \ |
| } \ |
| \ |
| DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ |
| FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ |
| ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ |
| return (__CLC_XCONCAT(RET_TYPE, 16))( \ |
| FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \ |
| FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
| ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \ |
| } |
| |
| #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ |
| _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ |
| return BUILTIN(x, y); \ |
| } \ |
| _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) |
| |
| #define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ |
| _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ |
| _CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) |
| |
| #define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \ |
| _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \ |
| return BUILTIN(x); \ |
| } \ |
| _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE) |
| |
| #ifdef cl_khr_fp16 |
| |
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable |
| |
| #define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \ |
| _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \ |
| return (half)FUNCTION((float)x); \ |
| } \ |
| _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half) |
| |
| #define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \ |
| _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \ |
| return (half)FUNCTION((float)x, (float)y); \ |
| } \ |
| _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) |
| |
| #else |
| |
| #define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) |
| #define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) |
| |
| #endif |