From: Francisco Jerez Date: Fri, 20 Apr 2012 14:56:19 +0000 (+0200) Subject: clover: Import OpenCL state tracker. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c6db1b3396384186aab5b685fe1fd540e17b3a62;p=mesa.git clover: Import OpenCL state tracker. --- diff --git a/configure.ac b/configure.ac index aac0a1a434c..07ecb1f00e1 100644 --- a/configure.ac +++ b/configure.ac @@ -616,7 +616,11 @@ AC_ARG_ENABLE([va], [enable va library @<:@default=auto@:>@])], [enable_va="$enableval"], [enable_va=auto]) - +AC_ARG_ENABLE([opencl], + [AS_HELP_STRING([--enable-opencl], + [enable OpenCL library @<:@default=no@:>@])], + [enable_opencl="$enableval"], + [enable_opencl=no]) AC_ARG_ENABLE([xlib_glx], [AS_HELP_STRING([--enable-xlib-glx], [make GLX library Xlib-based instead of DRI-based @<:@default=disable@:>@])], @@ -676,7 +680,8 @@ if test "x$enable_opengl" = xno -a \ "x$enable_d3d1x" = xno -a \ "x$enable_xvmc" = xno -a \ "x$enable_vdpau" = xno -a \ - "x$enable_va" = xno; then + "x$enable_va" = xno -a \ + "x$enable_opencl" = xno; then AC_MSG_ERROR([at least one API should be enabled]) fi @@ -1603,6 +1608,18 @@ if test "x$enable_va" = xyes; then HAVE_ST_VA="yes" fi +dnl +dnl OpenCL configuration +dnl + +if test "x$enable_opencl" = xyes; then + if test "x$with_gallium_drivers" = x; then + AC_MSG_ERROR([cannot enable OpenCL without Gallium]) + fi + GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS clover" + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS opencl" +fi + dnl dnl GLU configuration dnl @@ -1851,6 +1868,14 @@ AC_ARG_WITH([va-libdir], [VA_LIB_INSTALL_DIR='${libdir}/va']) AC_SUBST([VA_LIB_INSTALL_DIR]) +dnl Directory for OpenCL libs +AC_ARG_WITH([opencl-libdir], + [AS_HELP_STRING([--with-opencl-libdir=DIR], + [directory for the OpenCL libraries @<:@default=${libdir}/opencl@:>@])], + [OPENCL_LIB_INSTALL_DIR="$withval"], + [OPENCL_LIB_INSTALL_DIR='${libdir}/opencl']) +AC_SUBST([OPENCL_LIB_INSTALL_DIR]) + dnl dnl Gallium helper functions dnl @@ -2039,9 +2064,11 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS" dnl Substitute the config AC_CONFIG_FILES([configs/autoconf src/gallium/auxiliary/pipe-loader/Makefile + src/gallium/state_trackers/clover/Makefile src/gallium/drivers/Makefile src/gallium/drivers/r300/Makefile src/gallium/drivers/r600/Makefile + src/gallium/targets/opencl/Makefile src/gbm/Makefile src/gbm/main/gbm.pc src/egl/drivers/Makefile diff --git a/include/CL/cl.h b/include/CL/cl.h new file mode 100644 index 00000000000..4f21afe55bb --- /dev/null +++ b/include/CL/cl.h @@ -0,0 +1,998 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 + +/* OpenCL Version */ +#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 + +/* cl_context_info + cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 + +/* cl_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS +#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! +/* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ +extern CL_API_ENTRY cl_int CL_API_CALL +clSetCommandQueueProperty(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback( cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback( cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* cb */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_origin */, + const size_t * /* host_origin */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* cb */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_origin */, + const size_t * /* host_origin */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (*user_func)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ + diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp new file mode 100644 index 00000000000..99b86a66563 --- /dev/null +++ b/include/CL/cl.hpp @@ -0,0 +1,4011 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) + * \author Benedict R. Gaster and Laurent Morichetti + * + * Additions and fixes from Brian Cole, March 3rd 2010. + * + * \version 1.1 + * \date June 2010 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.1 (revision 04) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include + * #else + * #include + * #endif + * #include + * #include + * #include + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector devices = context.getInfo(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 +#include +#include +#if defined(USE_DX_INTEROP) +#include +#endif +#endif // _WIN32 + +// +#if defined(USE_CL_DEVICE_FISSION) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#include +#else +#include +#include +#endif // !__APPLE__ + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include + +#if !defined(__NO_STD_VECTOR) +#include +#endif + +#if !defined(__NO_STD_STRING) +#include +#endif + +#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) +# include +#endif // linux + +#include + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +/*! \class Error + * \brief Exception class + */ +class Error : public std::exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! Create a new CL error exception for a given error code + * and corresponding message. + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + const cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) + +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) + +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + */ +class string +{ +private: + ::size_t size_; + char * str_; +public: + string(void) : size_(0), str_(NULL) + { + } + + string(char * str, ::size_t size) : + size_(size), + str_(NULL) + { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + + string(char * str) : + str_(NULL) + { + size_= ::strlen(str); + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + size_ = 0; + str_ = NULL; + } + else { + size_ = rhs.size_; + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + string(const string& rhs) + { + *this = rhs; + } + + ~string() + { + if (str_ != NULL) { + delete[] str_; + } + } + + ::size_t size(void) const { return size_; } + ::size_t length(void) const { return size(); } + + const char * c_str(void) const { return (str_) ? str_ : "";} +}; + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +#include +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) +typedef cl::string STRING_CLASS; +#endif + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#include +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector +#endif + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * std::vector functionality. + */ +template +class vector +{ +private: + T data_[N]; + unsigned int size_; + bool empty_; +public: + vector() : + size_(-1), + empty_(true) + {} + + ~vector() {} + + unsigned int size(void) const + { + return size_ + 1; + } + + void clear() + { + size_ = -1; + empty_ = true; + } + + void push_back (const T& x) + { + if (size() < N) { + size_++; + data_[size_] = x; + empty_ = false; + } + } + + void pop_back(void) + { + if (!empty_) { + data_[size_].~T(); + size_--; + if (size_ == -1) { + empty_ = true; + } + } + } + + vector(const vector& vec) : + size_(vec.size_), + empty_(vec.empty_) + { + if (!empty_) { + memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); + } + } + + vector(unsigned int size, const T& val = T()) : + size_(-1), + empty_(true) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + vector& operator=(const vector& rhs) + { + if (this == &rhs) { + return *this; + } + + size_ = rhs.size_; + empty_ = rhs.empty_; + + if (!empty_) { + memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); + } + + return *this; + } + + bool operator==(vector &vec) + { + if (empty_ && vec.empty_) { + return true; + } + + if (size() != vec.size()) { + return false; + } + + return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; + } + + operator T* () { return data_; } + operator const T* () const { return data_; } + + bool empty (void) const + { + return empty_; + } + + unsigned int max_size (void) const + { + return N; + } + + unsigned int capacity () const + { + return sizeof(T) * N; + } + + T& operator[](int index) + { + return data_[index]; + } + + T operator[](int index) const + { + return data_[index]; + } + + template + void assign(I start, I end) + { + clear(); + while(start < end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Iterator class for vectors + */ + class iterator + { + private: + vector vec_; + int index_; + bool initialized_; + public: + iterator(void) : + index_(-1), + initialized_(false) + { + index_ = -1; + initialized_ = false; + } + + ~iterator(void) {} + + static iterator begin(vector &vec) + { + iterator i; + + if (!vec.empty()) { + i.index_ = 0; + } + + i.vec_ = vec; + i.initialized_ = true; + return i; + } + + static iterator end(vector &vec) + { + iterator i; + + if (!vec.empty()) { + i.index_ = vec.size(); + } + i.vec_ = vec; + i.initialized_ = true; + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_) && + (initialized_ == i.initialized_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + void operator++() + { + index_++; + } + + void operator++(int x) + { + index_ += x; + } + + void operator--() + { + index_--; + } + + void operator--(int x) + { + index_ -= x; + } + + T operator *() + { + return vec_[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_]; + } +}; + +/*! + * \brief size_t class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, who's + * size is known statically. + */ +template +struct size_t : public cl::vector< ::size_t, N> { }; + +namespace detail { + +// GetInfo help struct +template +struct GetInfoHelper +{ + static cl_int + get(Functor f, cl_uint name, T* param) + { + return f(name, sizeof(T), param, NULL); + } +}; + +// Specialized GetInfoHelper for VECTOR_CLASS params +template +struct GetInfoHelper > +{ + static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) + { + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; + } +}; + +// Specialized for getInfo +template +struct GetInfoHelper > +{ + static cl_int + get(Func f, cl_uint name, VECTOR_CLASS* param) + { + cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + if (err != CL_SUCCESS) { + return err; + } + + return CL_SUCCESS; + } +}; + +// Specialized GetInfoHelper for STRING_CLASS params +template +struct GetInfoHelper +{ + static cl_int get(Func f, cl_uint name, STRING_CLASS* param) + { + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; + } +}; + +#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ +namespace detail { \ +template \ +struct GetInfoHelper \ +{ \ + static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ + { \ + cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ + if (err != CL_SUCCESS) { \ + return err; \ + } \ + \ + return ReferenceHandler::retain((*param)()); \ + } \ +}; \ +} + + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) +#endif // USE_CL_DEVICE_FISSION + +template +struct param_traits {}; + +#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#undef __DECLARE_PARAM_TRAITS + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return GetInfoHelper::get(f, name, param); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return GetInfoHelper, T> + ::get(f0, name, param); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return GetInfoHelper, T> + ::get(f0, name, param); +} + +template +struct ReferenceHandler +{ }; + +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_INVALID_DEVICE; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_INVALID_DEVICE; } +}; + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_INVALID_PLATFORM; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_INVALID_PLATFORM; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { retain(); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { release(); } + object_ = rhs.object_; + if (object_ != NULL) { retain(); } + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + + cl_int retain() const + { + return ReferenceHandler::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler::release(object_); + } +}; + +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) throw(Error) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief ImageFormat interface fro cl_image_format. + */ +struct ImageFormat : public cl_image_format +{ + ImageFormat(){} + + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \class Device + * \brief Device interface for cl_device_id. + */ +class Device : public detail::Wrapper +{ +public: + Device(cl_device_id device) { object_ = device; } + + Device() : detail::Wrapper() { } + + Device(const Device& device) : detail::Wrapper(device) { } + + Device& operator = (const Device& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif +}; + +/*! \class Platform + * \brief Platform interface. + */ +class Platform : public detail::Wrapper +{ +public: + static const Platform null(); + + Platform(cl_platform_id platform) { object_ = platform; } + + Platform() : detail::Wrapper() { } + + Platform(const Platform& platform) : detail::Wrapper(platform) { } + + Platform& operator = (const Platform& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS* devices) const + { + cl_uint n = 0; + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + static cl_int get( + VECTOR_CLASS* platforms) + { + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +}; + +static inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} + +class Context : public detail::Wrapper +{ +public: + Context( + const VECTOR_CLASS& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateContext( + properties, (cl_uint) devices.size(), + (cl_device_id*) &devices.front(), + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + Context() : detail::Wrapper() { } + + Context(const Context& context) : detail::Wrapper(context) { } + + Context& operator = (const Context& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS* formats) const + { + cl_uint numEntries; + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*) value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + return CL_SUCCESS; + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Context) + +/*! \class Event + * \brief Event interface for cl_event. + */ +class Event : public detail::Wrapper +{ +public: + Event() : detail::Wrapper() { } + + Event(const Event& event) : detail::Wrapper(event) { } + + Event& operator = (const Event& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + static cl_int + waitForEvents(const VECTOR_CLASS& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Event) + +#if defined(CL_VERSION_1_1) +/*! \class UserEvent + * \brief User event interface for cl_event. + */ +class UserEvent : public Event +{ +public: + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + UserEvent() : Event() { } + + UserEvent(const UserEvent& event) : Event(event) { } + + UserEvent& operator = (const UserEvent& rhs) + { + if (this != &rhs) { + Event::operator=(rhs); + } + return *this; + } + + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +inline static cl_int +WaitForEvents(const VECTOR_CLASS& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \class Memory + * \brief Memory interface for cl_mem. + */ +class Memory : public detail::Wrapper +{ +public: + Memory() : detail::Wrapper() { } + + Memory(const Memory& memory) : detail::Wrapper(memory) { } + + Memory& operator = (const Memory& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) + +/*! \class Buffer + * \brief Memory buffer interface. + */ +class Buffer : public Memory +{ +public: + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + Buffer() : Memory() { } + + Buffer(const Buffer& buffer) : Memory(buffer) { } + + Buffer& operator = (const Buffer& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + +#if defined(CL_VERSION_1_1) + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferD3D10() : Buffer() { } + + BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + + BufferD3D10& operator = (const BufferD3D10& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } +}; +#endif + +/*! \class BufferGL + * \brief Memory buffer interface for GL interop. + */ +class BufferGL : public Buffer +{ +public: + BufferGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferGL() : Buffer() { } + + BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + + BufferGL& operator = (const BufferGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \class BufferRenderGL + * \brief Memory buffer interface for GL interop with renderbuffer. + */ +class BufferRenderGL : public Buffer +{ +public: + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferRenderGL() : Buffer() { } + + BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + + BufferRenderGL& operator = (const BufferRenderGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \class Image + * \brief Base class interface for all images. + */ +class Image : public Memory +{ +protected: + Image() : Memory() { } + + Image(const Image& image) : Memory(image) { } + + Image& operator = (const Image& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } +public: + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +/*! \class Image2D + * \brief Image interface for 2D images. + */ +class Image2D : public Image +{ +public: + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2D() { } + + Image2D(const Image2D& image2D) : Image(image2D) { } + + Image2D& operator = (const Image2D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } +}; + +/*! \class Image2DGL + * \brief 2D image interface for GL interop. + */ +class Image2DGL : public Image2D +{ +public: + Image2DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DGL() : Image2D() { } + + Image2DGL(const Image2DGL& image) : Image2D(image) { } + + Image2DGL& operator = (const Image2DGL& rhs) + { + if (this != &rhs) { + Image2D::operator=(rhs); + } + return *this; + } +}; + +/*! \class Image3D + * \brief Image interface for 3D images. + */ +class Image3D : public Image +{ +public: + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image3D() { } + + Image3D(const Image3D& image3D) : Image(image3D) { } + + Image3D& operator = (const Image3D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } +}; + +/*! \class Image2DGL + * \brief 2D image interface for GL interop. + */ +class Image3DGL : public Image3D +{ +public: + Image3DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + Image3DGL() : Image3D() { } + + Image3DGL(const Image3DGL& image) : Image3D(image) { } + + Image3DGL& operator = (const Image3DGL& rhs) + { + if (this != &rhs) { + Image3D::operator=(rhs); + } + return *this; + } +}; + +/*! \class Sampler + * \brief Sampler interface for cl_sampler. + */ +class Sampler : public detail::Wrapper +{ +public: + Sampler() { } + + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } + + Sampler& operator = (const Sampler& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) + +class Program; +class CommandQueue; +class Kernel; + +/*! \class NDRange + * \brief NDRange interface + */ +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + NDRange() + : dimensions_(0) + { } + + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_.push_back(size0); + } + + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_.push_back(size0); + sizes_.push_back(size1); + } + + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_.push_back(size0); + sizes_.push_back(size1); + sizes_.push_back(size2); + } + + operator const ::size_t*() const { return (const ::size_t*) sizes_; } + ::size_t dimensions() const { return dimensions_; } +}; + +static const NDRange NullRange; + +/*! + * \struct LocalSpaceArg + * \brief Local address raper for use with Kernel::setArg + */ +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static T* ptr(T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static void* ptr(LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +class KernelFunctor; + +/*! \class Kernel + * \brief Kernel interface that implements cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + Kernel() { } + + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } + + Kernel& operator = (const Kernel& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int setArg(cl_uint index, T value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } + + KernelFunctor bind( + const CommandQueue& queue, + const NDRange& offset, + const NDRange& global, + const NDRange& local); + + KernelFunctor bind( + const CommandQueue& queue, + const NDRange& global, + const NDRange& local); +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: + typedef VECTOR_CLASS > Binaries; + typedef VECTOR_CLASS > Sources; + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const VECTOR_CLASS& devices, + const Binaries& binaries, + VECTOR_CLASS* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + const ::size_t n = binaries.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); + + for (::size_t i = 0; i < n; ++i) { + images[i] = (const unsigned char*)binaries[(int)i].first; + lengths[i] = binaries[(int)i].second; + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + (cl_device_id*)&devices.front(), + lengths, images, binaryStatus != NULL + ? (cl_int*) &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + Program() { } + + Program(const Program& program) : detail::Wrapper(program) { } + + Program& operator = (const Program& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + cl_int build( + const VECTOR_CLASS& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + (cl_device_id*)&devices.front(), + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Program) + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +public: + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + CommandQueue() { } + + CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue& operator = (const CommandQueue& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_READ_BUFFER_ERR); + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_WRITE_BUFFER_ERR); + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQEUE_COPY_BUFFER_ERR); + } + +#if defined(CL_VERSION_1_1) + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_READ_BUFFER_RECT_ERR); + } + + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQEUE_COPY_BUFFER_RECT_ERR); + } +#endif + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_READ_IMAGE_ERR); + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_WRITE_IMAGE_ERR); + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_COPY_IMAGE_ERR); + } + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_NDRANGE_KERNEL_ERR); + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_TASK_ERR); + } + + cl_int enqueueNativeKernel( + void (*userFptr)(void *), + std::pair args, + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* mem_locs = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + return detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_NATIVE_KERNEL); + } + + cl_int enqueueMarker(Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueMarker(object_, (cl_event*) event), + __ENQUEUE_MARKER_ERR); + } + + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + (const cl_event*) &events.front()), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_ACQUIRE_GL_ERR); + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_RELEASE_GL_ERR); + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); + + return detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_ACQUIRE_GL_ERR); + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); + + return detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (cl_event*) event), + __ENQUEUE_RELEASE_GL_ERR); + } +#endif + + cl_int enqueueBarrier() const + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) + +/*! \class KernelFunctor + * \brief Kernel functor interface + * + * \note Currently only functors of zero to ten arguments are supported. It + * is straightforward to add more and a more general solution, similar to + * Boost.Lambda could be followed if required in the future. + */ +class KernelFunctor +{ +private: + Kernel kernel_; + CommandQueue queue_; + NDRange offset_; + NDRange global_; + NDRange local_; + + cl_int err_; +public: + KernelFunctor() { } + + KernelFunctor( + const Kernel& kernel, + const CommandQueue& queue, + const NDRange& offset, + const NDRange& global, + const NDRange& local) : + kernel_(kernel), + queue_(queue), + offset_(offset), + global_(global), + local_(local), + err_(CL_SUCCESS) + {} + + KernelFunctor& operator=(const KernelFunctor& rhs); + + KernelFunctor(const KernelFunctor& rhs); + + cl_int getError() { return err_; } + + inline Event operator()(const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const A14& a14, + const VECTOR_CLASS* events = NULL); + + template + inline Event operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const A14& a14, + const A15& a15, + const VECTOR_CLASS* events = NULL); +}; + +inline KernelFunctor Kernel::bind( + const CommandQueue& queue, + const NDRange& offset, + const NDRange& global, + const NDRange& local) +{ + return KernelFunctor(*this,queue,offset,global,local); +} + +inline KernelFunctor Kernel::bind( + const CommandQueue& queue, + const NDRange& global, + const NDRange& local) +{ + return KernelFunctor(*this,queue,NullRange,global,local); +} + +inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) +{ + if (this == &rhs) { + return *this; + } + + kernel_ = rhs.kernel_; + queue_ = rhs.queue_; + offset_ = rhs.offset_; + global_ = rhs.global_; + local_ = rhs.local_; + + return *this; +} + +inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : + kernel_(rhs.kernel_), + queue_(rhs.queue_), + offset_(rhs.offset_), + global_(rhs.global_), + local_(rhs.local_) +{ +} + +Event KernelFunctor::operator()(const VECTOR_CLASS* events) +{ + Event event; + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + kernel_.setArg(10,a11); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + kernel_.setArg(10,a11); + kernel_.setArg(11,a12); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + kernel_.setArg(10,a11); + kernel_.setArg(11,a12); + kernel_.setArg(12,a13); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const A14& a14, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + kernel_.setArg(10,a11); + kernel_.setArg(11,a12); + kernel_.setArg(12,a13); + kernel_.setArg(13,a14); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +template +Event KernelFunctor::operator()( + const A1& a1, + const A2& a2, + const A3& a3, + const A4& a4, + const A5& a5, + const A6& a6, + const A7& a7, + const A8& a8, + const A9& a9, + const A10& a10, + const A11& a11, + const A12& a12, + const A13& a13, + const A14& a14, + const A15& a15, + const VECTOR_CLASS* events) +{ + Event event; + + kernel_.setArg(0,a1); + kernel_.setArg(1,a2); + kernel_.setArg(2,a3); + kernel_.setArg(3,a4); + kernel_.setArg(4,a5); + kernel_.setArg(5,a6); + kernel_.setArg(6,a7); + kernel_.setArg(7,a8); + kernel_.setArg(8,a9); + kernel_.setArg(9,a10); + kernel_.setArg(10,a11); + kernel_.setArg(11,a12); + kernel_.setArg(12,a13); + kernel_.setArg(13,a14); + kernel_.setArg(14,a15); + + err_ = queue_.enqueueNDRangeKernel( + kernel_, + offset_, + global_, + local_, + NULL, // bgaster_fixme - do we want to allow wait event lists? + &event); + + return event; +} + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __GET_INFO_HELPER_WITH_RETAIN + +// Extensions +#undef __INIT_CL_EXT_FCN_PTR +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +} // namespace cl + +#endif // CL_HPP_ diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h new file mode 100644 index 00000000000..4e92c7e634b --- /dev/null +++ b/include/CL/cl_ext.h @@ -0,0 +1,213 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include + #include +#else + #include +#endif + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, + void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( + cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 + + +#ifdef CL_VERSION_1_1 + /*********************************** + * cl_ext_device_fission extension * + ***********************************/ + #define cl_ext_device_fission 1 + + extern CL_API_ENTRY cl_int CL_API_CALL + clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + extern CL_API_ENTRY cl_int CL_API_CALL + clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef cl_ulong cl_device_partition_property_ext; + extern CL_API_ENTRY cl_int CL_API_CALL + clCreateSubDevicesEXT( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + /* cl_device_partition_property_ext */ + #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 + #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 + #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 + #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + + /* clDeviceGetInfo selectors */ + #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 + #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 + #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 + #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 + #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + + /* error codes */ + #define CL_DEVICE_PARTITION_FAILED_EXT -1057 + #define CL_INVALID_PARTITION_COUNT_EXT -1058 + #define CL_INVALID_PARTITION_NAME_EXT -1059 + + /* CL_AFFINITY_DOMAINs */ + #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 + #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 + #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 + #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 + #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 + #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + + /* cl_device_partition_property_ext list terminators */ + #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + + +#endif /* CL_VERSION_1_1 */ + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h new file mode 100644 index 00000000000..3b4fe0690e5 --- /dev/null +++ b/include/CL/cl_gl.h @@ -0,0 +1,155 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +/* + * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have + * OpenGL dependencies. The application is responsible for #including + * OpenGL or OpenGL ES headers before #including cl_gl.h. + */ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#ifdef __APPLE__ +#include +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* bufobj */, + int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* renderbuffer */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h new file mode 100644 index 00000000000..26e47821f9e --- /dev/null +++ b/include/CL/cl_gl_ext.h @@ -0,0 +1,69 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ +/* OpenGL dependencies. */ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include +#else + #include +#endif + +/* + * For each extension, follow this template + * /* cl_VEN_extname extension */ +/* #define cl_VEN_extname 1 + * ... define new types, if any + * ... define new tokens, if any + * ... define new APIs, if any + * + * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header + * This allows us to avoid having to decide whether to include GL headers or GLES here. + */ + +/* + * cl_khr_gl_event extension + * See section 9.9 in the OpenCL 1.1 spec for more information + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context /* context */, + cl_GLsync /* cl_GLsync */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h new file mode 100644 index 00000000000..043b0489df7 --- /dev/null +++ b/include/CL/cl_platform.h @@ -0,0 +1,1198 @@ +/********************************************************************************** + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +#ifdef __APPLE__ + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#else + #define CL_EXTENSION_WEAK_LINK + #define CL_API_SUFFIX__VERSION_1_0 + #define CL_EXT_SUFFIX__VERSION_1_0 + #define CL_API_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef vector unsigned char __cl_uchar16; + typedef vector signed char __cl_char16; + typedef vector unsigned short __cl_ushort8; + typedef vector signed short __cl_short8; + typedef vector unsigned int __cl_uint4; + typedef vector signed int __cl_int4; + typedef vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y; }; + __extension__ struct{ cl_char s0, s1; }; + __extension__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w; }; + __extension__ struct{ cl_char s0, s1, s2, s3; }; + __extension__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w; }; + __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y; }; + __extension__ struct{ cl_uchar s0, s1; }; + __extension__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3; }; + __extension__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y; }; + __extension__ struct{ cl_short s0, s1; }; + __extension__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w; }; + __extension__ struct{ cl_short s0, s1, s2, s3; }; + __extension__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w; }; + __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y; }; + __extension__ struct{ cl_ushort s0, s1; }; + __extension__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3; }; + __extension__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y; }; + __extension__ struct{ cl_int s0, s1; }; + __extension__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w; }; + __extension__ struct{ cl_int s0, s1, s2, s3; }; + __extension__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w; }; + __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y; }; + __extension__ struct{ cl_uint s0, s1; }; + __extension__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w; }; + __extension__ struct{ cl_uint s0, s1, s2, s3; }; + __extension__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w; }; + __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y; }; + __extension__ struct{ cl_long s0, s1; }; + __extension__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w; }; + __extension__ struct{ cl_long s0, s1, s2, s3; }; + __extension__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w; }; + __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y; }; + __extension__ struct{ cl_ulong s0, s1; }; + __extension__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3; }; + __extension__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y; }; + __extension__ struct{ cl_float s0, s1; }; + __extension__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w; }; + __extension__ struct{ cl_float s0, s1, s2, s3; }; + __extension__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w; }; + __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y; }; + __extension__ struct{ cl_double s0, s1; }; + __extension__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w; }; + __extension__ struct{ cl_double s0, s1, s2, s3; }; + __extension__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w; }; + __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __extension__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) + __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __extension__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_BEGIN \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/include/CL/opencl.h b/include/CL/opencl.h new file mode 100644 index 00000000000..26a63899758 --- /dev/null +++ b/include/CL/opencl.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + +#include +#include +#include +#include + +#else + +#include +#include +#include +#include + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ + diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile index 0900efc664f..d5162c17507 100644 --- a/src/gallium/state_trackers/Makefile +++ b/src/gallium/state_trackers/Makefile @@ -17,7 +17,7 @@ subdirs: clean: - rm -f `find . -name \*.[oa]` + rm -f `find . -regex '.*\.l?[oa]'` rm -f `find . -name depend` diff --git a/src/gallium/state_trackers/clover/Doxyfile b/src/gallium/state_trackers/clover/Doxyfile new file mode 100644 index 00000000000..50250e75672 --- /dev/null +++ b/src/gallium/state_trackers/clover/Doxyfile @@ -0,0 +1,1716 @@ +# Doxyfile 1.7.4 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = Clover + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = NO + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = api/ core/ + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is adviced to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the +# mathjax.org site, so you can quickly see the result without installing +# MathJax, but it is strongly recommended to install a local copy of MathJax +# before deployment. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called Helvetica to the output +# directory and reference it in all dot files that doxygen generates. +# When you want a differently looking font you can specify the font name +# using DOT_FONTNAME. You need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am new file mode 100644 index 00000000000..da9f3bb92da --- /dev/null +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -0,0 +1,71 @@ +AUTOMAKE_OPTIONS = subdir-objects + +AM_CPPFLAGS = \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ + -DMESA_VERSION=\"$(MESA_VERSION)\" \ + -DPIPE_SEARCH_DIR=\"$(OPENCL_LIB_INSTALL_DIR)\" \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(srcdir) + +noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la + +libcltgsi_la_CXXFLAGS = \ + -std=c++0x + +libcltgsi_la_SOURCES = \ + tgsi/compiler.cpp + +libclllvm_la_CXXFLAGS = \ + -std=c++98 + +libclllvm_la_SOURCES = \ + llvm/invocation.cpp + +libclover_la_CXXFLAGS = \ + -std=c++0x + +libclover_la_LIBADD = \ + libcltgsi.la libclllvm.la + +libclover_la_SOURCES = \ + core/base.hpp \ + core/compat.hpp \ + core/compiler.hpp \ + core/geometry.hpp \ + core/device.hpp \ + core/device.cpp \ + core/context.hpp \ + core/context.cpp \ + core/queue.hpp \ + core/queue.cpp \ + core/format.hpp \ + core/format.cpp \ + core/memory.hpp \ + core/memory.cpp \ + core/resource.hpp \ + core/resource.cpp \ + core/sampler.hpp \ + core/sampler.cpp \ + core/event.hpp \ + core/event.cpp \ + core/program.hpp \ + core/program.cpp \ + core/kernel.hpp \ + core/kernel.cpp \ + core/module.hpp \ + core/module.cpp \ + api/util.hpp \ + api/platform.cpp \ + api/device.cpp \ + api/context.cpp \ + api/queue.cpp \ + api/memory.cpp \ + api/transfer.cpp \ + api/sampler.cpp \ + api/event.cpp \ + api/program.cpp \ + api/kernel.cpp diff --git a/src/gallium/state_trackers/clover/api/context.cpp b/src/gallium/state_trackers/clover/api/context.cpp new file mode 100644 index 00000000000..c8d668933e5 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/context.cpp @@ -0,0 +1,120 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/context.hpp" + +using namespace clover; + +PUBLIC cl_context +clCreateContext(const cl_context_properties *props, cl_uint num_devs, + const cl_device_id *devs, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, + size_t, void *), + void *user_data, cl_int *errcode_ret) try { + auto mprops = property_map(props); + + if (!devs || !num_devs || + (!pfn_notify && user_data)) + throw error(CL_INVALID_VALUE); + + if (any_of(is_zero(), devs, devs + num_devs)) + throw error(CL_INVALID_DEVICE); + + for (auto p : mprops) { + if (!(p.first == CL_CONTEXT_PLATFORM && + (cl_platform_id)p.second == NULL)) + throw error(CL_INVALID_PROPERTY); + } + + ret_error(errcode_ret, CL_SUCCESS); + return new context( + property_vector(mprops), + std::vector(devs, devs + num_devs)); + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_context +clCreateContextFromType(const cl_context_properties *props, + cl_device_type type, + void (CL_CALLBACK *pfn_notify)( + const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) { + cl_device_id dev; + cl_int ret; + + ret = clGetDeviceIDs(0, type, 1, &dev, 0); + if (ret) { + ret_error(errcode_ret, ret); + return NULL; + } + + return clCreateContext(props, 1, &dev, pfn_notify, user_data, errcode_ret); +} + +PUBLIC cl_int +clRetainContext(cl_context ctx) { + if (!ctx) + return CL_INVALID_CONTEXT; + + ctx->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseContext(cl_context ctx) { + if (!ctx) + return CL_INVALID_CONTEXT; + + if (ctx->release()) + delete ctx; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetContextInfo(cl_context ctx, cl_context_info param, + size_t size, void *buf, size_t *size_ret) { + if (!ctx) + return CL_INVALID_CONTEXT; + + switch (param) { + case CL_CONTEXT_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, ctx->ref_count()); + + case CL_CONTEXT_NUM_DEVICES: + return scalar_property(buf, size, size_ret, ctx->devs.size()); + + case CL_CONTEXT_DEVICES: + return vector_property(buf, size, size_ret, ctx->devs); + + case CL_CONTEXT_PROPERTIES: + return vector_property(buf, size, size_ret, + ctx->props()); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp new file mode 100644 index 00000000000..03767519aaf --- /dev/null +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -0,0 +1,262 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/device.hpp" + +using namespace clover; + +static device_registry registry; + +PUBLIC cl_int +clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) { + std::vector devs; + + if (platform != NULL) + return CL_INVALID_PLATFORM; + + if ((!num_entries && devices) || + (!num_devices && !devices)) + return CL_INVALID_VALUE; + + // Collect matching devices + for (device &dev : registry) { + if (((device_type & CL_DEVICE_TYPE_DEFAULT) && + &dev == ®istry.front()) || + (device_type & dev.type())) + devs.push_back(&dev); + } + + if (devs.empty()) + return CL_DEVICE_NOT_FOUND; + + // ...and return the requested data. + if (num_devices) + *num_devices = devs.size(); + if (devices) + std::copy_n(devs.begin(), + std::min((cl_uint)devs.size(), num_entries), + devices); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetDeviceInfo(cl_device_id dev, cl_device_info param, + size_t size, void *buf, size_t *size_ret) { + if (!dev) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_DEVICE_TYPE: + return scalar_property(buf, size, size_ret, dev->type()); + + case CL_DEVICE_VENDOR_ID: + return scalar_property(buf, size, size_ret, dev->vendor_id()); + + case CL_DEVICE_MAX_COMPUTE_UNITS: + return scalar_property(buf, size, size_ret, 1); + + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: + return scalar_property(buf, size, size_ret, + dev->max_block_size().size()); + + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + return vector_property(buf, size, size_ret, + dev->max_block_size()); + + case CL_DEVICE_MAX_WORK_GROUP_SIZE: + return scalar_property(buf, size, size_ret, SIZE_MAX); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: + return scalar_property(buf, size, size_ret, 16); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: + return scalar_property(buf, size, size_ret, 8); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: + return scalar_property(buf, size, size_ret, 4); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: + return scalar_property(buf, size, size_ret, 2); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: + return scalar_property(buf, size, size_ret, 4); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + return scalar_property(buf, size, size_ret, 2); + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_MAX_CLOCK_FREQUENCY: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_ADDRESS_BITS: + return scalar_property(buf, size, size_ret, 32); + + case CL_DEVICE_MAX_READ_IMAGE_ARGS: + return scalar_property(buf, size, size_ret, + dev->max_images_read()); + + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: + return scalar_property(buf, size, size_ret, + dev->max_images_write()); + + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_IMAGE2D_MAX_WIDTH: + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: + return scalar_property(buf, size, size_ret, + 1 << dev->max_image_levels_2d()); + + case CL_DEVICE_IMAGE3D_MAX_WIDTH: + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: + case CL_DEVICE_IMAGE3D_MAX_DEPTH: + return scalar_property(buf, size, size_ret, + 1 << dev->max_image_levels_3d()); + + case CL_DEVICE_IMAGE_SUPPORT: + return scalar_property(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_MAX_PARAMETER_SIZE: + return scalar_property(buf, size, size_ret, + dev->max_mem_input()); + + case CL_DEVICE_MAX_SAMPLERS: + return scalar_property(buf, size, size_ret, + dev->max_samplers()); + + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: + return scalar_property(buf, size, size_ret, 128); + + case CL_DEVICE_SINGLE_FP_CONFIG: + return scalar_property(buf, size, size_ret, + CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST); + + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: + return scalar_property(buf, size, size_ret, + CL_NONE); + + case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_GLOBAL_MEM_SIZE: + return scalar_property(buf, size, size_ret, + dev->max_mem_global()); + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: + return scalar_property(buf, size, size_ret, + dev->max_const_buffer_size()); + + case CL_DEVICE_MAX_CONSTANT_ARGS: + return scalar_property(buf, size, size_ret, + dev->max_const_buffers()); + + case CL_DEVICE_LOCAL_MEM_TYPE: + return scalar_property(buf, size, size_ret, + CL_LOCAL); + + case CL_DEVICE_LOCAL_MEM_SIZE: + return scalar_property(buf, size, size_ret, + dev->max_mem_local()); + + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: + return scalar_property(buf, size, size_ret, CL_FALSE); + + case CL_DEVICE_PROFILING_TIMER_RESOLUTION: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_ENDIAN_LITTLE: + return scalar_property(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_AVAILABLE: + case CL_DEVICE_COMPILER_AVAILABLE: + return scalar_property(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_EXECUTION_CAPABILITIES: + return scalar_property(buf, size, size_ret, + CL_EXEC_KERNEL); + + case CL_DEVICE_QUEUE_PROPERTIES: + return scalar_property(buf, size, size_ret, + CL_QUEUE_PROFILING_ENABLE); + + case CL_DEVICE_NAME: + return string_property(buf, size, size_ret, dev->device_name()); + + case CL_DEVICE_VENDOR: + return string_property(buf, size, size_ret, dev->vendor_name()); + + case CL_DRIVER_VERSION: + return string_property(buf, size, size_ret, MESA_VERSION); + + case CL_DEVICE_PROFILE: + return string_property(buf, size, size_ret, "FULL_PROFILE"); + + case CL_DEVICE_VERSION: + return string_property(buf, size, size_ret, "OpenCL 1.1 MESA " MESA_VERSION); + + case CL_DEVICE_EXTENSIONS: + return string_property(buf, size, size_ret, ""); + + case CL_DEVICE_PLATFORM: + return scalar_property(buf, size, size_ret, NULL); + + case CL_DEVICE_HOST_UNIFIED_MEMORY: + return scalar_property(buf, size, size_ret, CL_TRUE); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: + return scalar_property(buf, size, size_ret, 16); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: + return scalar_property(buf, size, size_ret, 8); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: + return scalar_property(buf, size, size_ret, 4); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: + return scalar_property(buf, size, size_ret, 2); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: + return scalar_property(buf, size, size_ret, 4); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: + return scalar_property(buf, size, size_ret, 2); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: + return scalar_property(buf, size, size_ret, 0); + + case CL_DEVICE_OPENCL_C_VERSION: + return string_property(buf, size, size_ret, "OpenCL C 1.1"); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/event.cpp b/src/gallium/state_trackers/clover/api/event.cpp new file mode 100644 index 00000000000..d6c37f6aef2 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/event.cpp @@ -0,0 +1,239 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/event.hpp" + +using namespace clover; + +PUBLIC cl_event +clCreateUserEvent(cl_context ctx, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + ret_error(errcode_ret, CL_SUCCESS); + return new soft_event(*ctx, {}, false); + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clSetUserEventStatus(cl_event ev, cl_int status) { + if (!dynamic_cast(ev)) + return CL_INVALID_EVENT; + + if (status > 0) + return CL_INVALID_VALUE; + + if (ev->status() <= 0) + return CL_INVALID_OPERATION; + + if (status) + ev->abort(status); + else + ev->trigger(); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clWaitForEvents(cl_uint num_evs, const cl_event *evs) try { + if (!num_evs || !evs) + throw error(CL_INVALID_VALUE); + + std::for_each(evs, evs + num_evs, [&](const cl_event ev) { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (&ev->ctx != &evs[0]->ctx) + throw error(CL_INVALID_CONTEXT); + + if (ev->status() < 0) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); + }); + + // Create a temporary soft event that depends on all the events in + // the wait list + ref_ptr sev = transfer( + new soft_event(evs[0]->ctx, { evs, evs + num_evs }, true)); + + // ...and wait on it. + sev->wait(); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetEventInfo(cl_event ev, cl_event_info param, + size_t size, void *buf, size_t *size_ret) { + if (!ev) + return CL_INVALID_EVENT; + + switch (param) { + case CL_EVENT_COMMAND_QUEUE: + return scalar_property(buf, size, size_ret, ev->queue()); + + case CL_EVENT_CONTEXT: + return scalar_property(buf, size, size_ret, &ev->ctx); + + case CL_EVENT_COMMAND_TYPE: + return scalar_property(buf, size, size_ret, ev->command()); + + case CL_EVENT_COMMAND_EXECUTION_STATUS: + return scalar_property(buf, size, size_ret, ev->status()); + + case CL_EVENT_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, ev->ref_count()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clSetEventCallback(cl_event ev, cl_int type, + void (CL_CALLBACK *pfn_event_notify)(cl_event, cl_int, + void *), + void *user_data) try { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (!pfn_event_notify || type != CL_COMPLETE) + throw error(CL_INVALID_VALUE); + + // Create a temporary soft event that depends on ev, with + // pfn_event_notify as completion action. + ref_ptr sev = transfer( + new soft_event(ev->ctx, { ev }, true, + [=](event &) { + ev->wait(); + pfn_event_notify(ev, ev->status(), user_data); + })); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clRetainEvent(cl_event ev) { + if (!ev) + return CL_INVALID_EVENT; + + ev->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseEvent(cl_event ev) { + if (!ev) + return CL_INVALID_EVENT; + + if (ev->release()) + delete ev; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clEnqueueMarker(cl_command_queue q, cl_event *ev) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!ev) + throw error(CL_INVALID_VALUE); + + *ev = new hard_event(*q, CL_COMMAND_MARKER, {}); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueBarrier(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + // No need to do anything, q preserves data ordering strictly. + return CL_SUCCESS; +} + +PUBLIC cl_int +clEnqueueWaitForEvents(cl_command_queue q, cl_uint num_evs, + const cl_event *evs) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!num_evs || !evs) + throw error(CL_INVALID_VALUE); + + std::for_each(evs, evs + num_evs, [&](const cl_event ev) { + if (!ev) + throw error(CL_INVALID_EVENT); + + if (&ev->ctx != &q->ctx) + throw error(CL_INVALID_CONTEXT); + }); + + // Create a hard event that depends on the events in the wait list: + // subsequent commands in the same queue will be implicitly + // serialized with respect to it -- hard events always are. + ref_ptr hev = transfer( + new hard_event(*q, 0, { evs, evs + num_evs })); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetEventProfilingInfo(cl_event ev, cl_profiling_info param, + size_t size, void *buf, size_t *size_ret) { + return CL_PROFILING_INFO_NOT_AVAILABLE; +} + +PUBLIC cl_int +clFinish(cl_command_queue q) try { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + // Create a temporary hard event -- it implicitly depends on all + // the previously queued hard events. + ref_ptr hev = transfer(new hard_event(*q, 0, { })); + + // And wait on it. + hev->wait(); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp new file mode 100644 index 00000000000..44eeb277127 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -0,0 +1,318 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/kernel.hpp" +#include "core/event.hpp" + +using namespace clover; + +PUBLIC cl_kernel +clCreateKernel(cl_program prog, const char *name, + cl_int *errcode_ret) try { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (!name) + throw error(CL_INVALID_VALUE); + + if (prog->binaries().empty()) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + auto sym = prog->binaries().begin()->second.sym(name); + + ret_error(errcode_ret, CL_SUCCESS); + return new kernel(*prog, name, { sym.args.begin(), sym.args.end() }); + +} catch (module::noent_error &e) { + ret_error(errcode_ret, CL_INVALID_KERNEL_NAME); + return NULL; + +} catch(error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clCreateKernelsInProgram(cl_program prog, cl_uint count, + cl_kernel *kerns, cl_uint *count_ret) { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (prog->binaries().empty()) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + auto &syms = prog->binaries().begin()->second.syms; + + if (kerns && count < syms.size()) + throw error(CL_INVALID_VALUE); + + if (kerns) + std::transform(syms.begin(), syms.end(), kerns, + [=](const module::symbol &sym) { + return new kernel(*prog, compat::string(sym.name), + { sym.args.begin(), sym.args.end() }); + }); + + if (count_ret) + *count_ret = syms.size(); + + return CL_SUCCESS; +} + +PUBLIC cl_int +clRetainKernel(cl_kernel kern) { + if (!kern) + return CL_INVALID_KERNEL; + + kern->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseKernel(cl_kernel kern) { + if (!kern) + return CL_INVALID_KERNEL; + + if (kern->release()) + delete kern; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size, + const void *value) try { + if (!kern) + throw error(CL_INVALID_KERNEL); + + if (idx >= kern->args.size()) + throw error(CL_INVALID_ARG_INDEX); + + kern->args[idx]->set(size, value); + + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetKernelInfo(cl_kernel kern, cl_kernel_info param, + size_t size, void *buf, size_t *size_ret) { + if (!kern) + return CL_INVALID_KERNEL; + + switch (param) { + case CL_KERNEL_FUNCTION_NAME: + return string_property(buf, size, size_ret, kern->name()); + + case CL_KERNEL_NUM_ARGS: + return scalar_property(buf, size, size_ret, + kern->args.size()); + + case CL_KERNEL_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, + kern->ref_count()); + + case CL_KERNEL_CONTEXT: + return scalar_property(buf, size, size_ret, + &kern->prog.ctx); + + case CL_KERNEL_PROGRAM: + return scalar_property(buf, size, size_ret, + &kern->prog); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev, + cl_kernel_work_group_info param, + size_t size, void *buf, size_t *size_ret) { + if (!kern) + return CL_INVALID_KERNEL; + + if ((!dev && kern->prog.binaries().size() != 1) || + (dev && !kern->prog.binaries().count(dev))) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_KERNEL_WORK_GROUP_SIZE: + return scalar_property(buf, size, size_ret, + kern->max_block_size()); + + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: + return vector_property(buf, size, size_ret, + kern->block_size()); + + case CL_KERNEL_LOCAL_MEM_SIZE: + return scalar_property(buf, size, size_ret, + kern->mem_local()); + + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + return scalar_property(buf, size, size_ret, 1); + + case CL_KERNEL_PRIVATE_MEM_SIZE: + return scalar_property(buf, size, size_ret, + kern->mem_private()); + + default: + return CL_INVALID_VALUE; + } +} + +namespace { + /// + /// Common argument checking shared by kernel invocation commands. + /// + void + kernel_validate(cl_command_queue q, cl_kernel kern, + cl_uint dims, const size_t *grid_offset, + const size_t *grid_size, const size_t *block_size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (!kern) + throw error(CL_INVALID_KERNEL); + + if (&kern->prog.ctx != &q->ctx || + any_of([&](const cl_event ev) { + return &ev->ctx != &q->ctx; + }, deps, deps + num_deps)) + throw error(CL_INVALID_CONTEXT); + + if (bool(num_deps) != bool(deps) || + any_of(is_zero(), deps, deps + num_deps)) + throw error(CL_INVALID_EVENT_WAIT_LIST); + + if (any_of([](std::unique_ptr &arg) { + return !arg->set(); + }, kern->args.begin(), kern->args.end())) + throw error(CL_INVALID_KERNEL_ARGS); + + if (!kern->prog.binaries().count(&q->dev)) + throw error(CL_INVALID_PROGRAM_EXECUTABLE); + + if (dims < 1 || dims > q->dev.max_block_size().size()) + throw error(CL_INVALID_WORK_DIMENSION); + + if (!grid_size || any_of(is_zero(), grid_size, grid_size + dims)) + throw error(CL_INVALID_GLOBAL_WORK_SIZE); + + if (block_size && any_of([](size_t b, size_t max) { + return b == 0 || b > max; + }, block_size, block_size + dims, + q->dev.max_block_size().begin())) + throw error(CL_INVALID_WORK_ITEM_SIZE); + + if (block_size && any_of([](size_t b, size_t g) { + return g % b; + }, block_size, block_size + dims, grid_size)) + throw error(CL_INVALID_WORK_GROUP_SIZE); + } + + /// + /// Common event action shared by kernel invocation commands. + /// + std::function + kernel_op(cl_command_queue q, cl_kernel kern, + const std::vector &grid_offset, + const std::vector &grid_size, + const std::vector &block_size) { + const std::vector reduced_grid_size = map( + std::divides(), grid_size.begin(), grid_size.end(), + block_size.begin()); + + return [=](event &) { + kern->launch(*q, grid_offset, reduced_grid_size, block_size); + }; + } + + template + std::vector + opt_vector(const T *p, S n) { + if (p) + return { p, p + n }; + else + return { n }; + } +} + +PUBLIC cl_int +clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern, + cl_uint dims, const size_t *pgrid_offset, + const size_t *pgrid_size, const size_t *pblock_size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + const std::vector grid_offset = opt_vector(pgrid_offset, dims); + const std::vector grid_size = opt_vector(pgrid_size, dims); + const std::vector block_size = opt_vector(pblock_size, dims); + + kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size, + num_deps, deps, ev); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps }, + kernel_op(q, kern, grid_offset, grid_size, block_size)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueTask(cl_command_queue q, cl_kernel kern, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + const std::vector grid_offset = { 0 }; + const std::vector grid_size = { 1 }; + const std::vector block_size = { 1 }; + + kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(), + block_size.data(), num_deps, deps, ev); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_TASK, { deps, deps + num_deps }, + kernel_op(q, kern, grid_offset, grid_size, block_size)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch(error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *), + void *args, size_t args_size, + cl_uint obj_count, const cl_mem *obj_list, + const void **obj_args, cl_uint num_deps, + const cl_event *deps, cl_event *ev) { + return CL_INVALID_OPERATION; +} diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp new file mode 100644 index 00000000000..1b1ae73796f --- /dev/null +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -0,0 +1,305 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/memory.hpp" +#include "core/format.hpp" + +using namespace clover; + +PUBLIC cl_mem +clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!size) + throw error(CL_INVALID_BUFFER_SIZE); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + ret_error(errcode_ret, CL_SUCCESS); + return new root_buffer(*ctx, flags, size, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateSubBuffer(cl_mem obj, cl_mem_flags flags, cl_buffer_create_type op, + const void *op_info, cl_int *errcode_ret) try { + root_buffer *parent = dynamic_cast(obj); + + if (!parent) + throw error(CL_INVALID_MEM_OBJECT); + + if ((flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) || + (~flags & parent->flags() & (CL_MEM_READ_ONLY | + CL_MEM_WRITE_ONLY))) + throw error(CL_INVALID_VALUE); + + if (op == CL_BUFFER_CREATE_TYPE_REGION) { + const cl_buffer_region *reg = (const cl_buffer_region *)op_info; + + if (!reg || + reg->origin > parent->size() || + reg->origin + reg->size > parent->size()) + throw error(CL_INVALID_VALUE); + + if (!reg->size) + throw error(CL_INVALID_BUFFER_SIZE); + + ret_error(errcode_ret, CL_SUCCESS); + return new sub_buffer(*parent, flags, reg->origin, reg->size); + + } else { + throw error(CL_INVALID_VALUE); + } + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateImage2D(cl_context ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t row_pitch, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!format) + throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); + + if (width < 1 || height < 1) + throw error(CL_INVALID_IMAGE_SIZE); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format)) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + ret_error(errcode_ret, CL_SUCCESS); + return new image2d(*ctx, flags, format, width, height, + row_pitch, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_mem +clCreateImage3D(cl_context ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr, cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!format) + throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); + + if (width < 1 || height < 1 || depth < 2) + throw error(CL_INVALID_IMAGE_SIZE); + + if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | + CL_MEM_COPY_HOST_PTR))) + throw error(CL_INVALID_HOST_PTR); + + if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format)) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + ret_error(errcode_ret, CL_SUCCESS); + return new image3d(*ctx, flags, format, width, height, depth, + row_pitch, slice_pitch, host_ptr); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags, + cl_mem_object_type type, cl_uint count, + cl_image_format *buf, cl_uint *count_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_COPY_HOST_PTR)) + throw error(CL_INVALID_VALUE); + + if (!count && buf) + throw error(CL_INVALID_VALUE); + + auto formats = supported_formats(ctx, type); + + if (buf) + std::copy_n(formats.begin(), std::min((cl_uint)formats.size(), count), + buf); + if (count_ret) + *count_ret = formats.size(); + + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clGetMemObjectInfo(cl_mem obj, cl_mem_info param, + size_t size, void *buf, size_t *size_ret) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + switch (param) { + case CL_MEM_TYPE: + return scalar_property(buf, size, size_ret, + obj->type()); + + case CL_MEM_FLAGS: + return scalar_property(buf, size, size_ret, obj->flags()); + + case CL_MEM_SIZE: + return scalar_property(buf, size, size_ret, obj->size()); + + case CL_MEM_HOST_PTR: + return scalar_property(buf, size, size_ret, obj->host_ptr()); + + case CL_MEM_MAP_COUNT: + return scalar_property(buf, size, size_ret, 0); + + case CL_MEM_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, obj->ref_count()); + + case CL_MEM_CONTEXT: + return scalar_property(buf, size, size_ret, &obj->ctx); + + case CL_MEM_ASSOCIATED_MEMOBJECT: { + sub_buffer *sub = dynamic_cast(obj); + return scalar_property(buf, size, size_ret, + (sub ? &sub->parent : NULL)); + } + case CL_MEM_OFFSET: { + sub_buffer *sub = dynamic_cast(obj); + return scalar_property(buf, size, size_ret, + (sub ? sub->offset() : 0)); + } + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetImageInfo(cl_mem obj, cl_image_info param, + size_t size, void *buf, size_t *size_ret) { + image *img = dynamic_cast(obj); + if (!img) + return CL_INVALID_MEM_OBJECT; + + switch (param) { + case CL_IMAGE_FORMAT: + return scalar_property(buf, size, size_ret, + img->format()); + + case CL_IMAGE_ELEMENT_SIZE: + return scalar_property(buf, size, size_ret, 0); + + case CL_IMAGE_ROW_PITCH: + return scalar_property(buf, size, size_ret, img->row_pitch()); + + case CL_IMAGE_SLICE_PITCH: + return scalar_property(buf, size, size_ret, img->slice_pitch()); + + case CL_IMAGE_WIDTH: + return scalar_property(buf, size, size_ret, img->width()); + + case CL_IMAGE_HEIGHT: + return scalar_property(buf, size, size_ret, img->height()); + + case CL_IMAGE_DEPTH: + return scalar_property(buf, size, size_ret, img->depth()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clRetainMemObject(cl_mem obj) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + obj->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseMemObject(cl_mem obj) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + if (obj->release()) + delete obj; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clSetMemObjectDestructorCallback(cl_mem obj, + void (CL_CALLBACK *pfn_notify)(cl_mem, void *), + void *user_data) { + if (!obj) + return CL_INVALID_MEM_OBJECT; + + if (!pfn_notify) + return CL_INVALID_VALUE; + + obj->destroy_notify([=]{ pfn_notify(obj, user_data); }); + + return CL_SUCCESS; +} diff --git a/src/gallium/state_trackers/clover/api/platform.cpp b/src/gallium/state_trackers/clover/api/platform.cpp new file mode 100644 index 00000000000..e5e80b85256 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/platform.cpp @@ -0,0 +1,68 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" + +using namespace clover; + +PUBLIC cl_int +clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) { + if ((!num_entries && platforms) || + (!num_platforms && !platforms)) + return CL_INVALID_VALUE; + + if (num_platforms) + *num_platforms = 1; + if (platforms) + *platforms = NULL; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, + size_t size, void *buf, size_t *size_ret) { + if (platform != NULL) + return CL_INVALID_PLATFORM; + + switch (param_name) { + case CL_PLATFORM_PROFILE: + return string_property(buf, size, size_ret, "FULL_PROFILE"); + + case CL_PLATFORM_VERSION: + return string_property(buf, size, size_ret, + "OpenCL 1.1 MESA " MESA_VERSION); + + case CL_PLATFORM_NAME: + return string_property(buf, size, size_ret, "Default"); + + case CL_PLATFORM_VENDOR: + return string_property(buf, size, size_ret, "Mesa"); + + case CL_PLATFORM_EXTENSIONS: + return string_property(buf, size, size_ret, ""); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp new file mode 100644 index 00000000000..e874c51ad7d --- /dev/null +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -0,0 +1,241 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/program.hpp" + +using namespace clover; + +PUBLIC cl_program +clCreateProgramWithSource(cl_context ctx, cl_uint count, + const char **strings, const size_t *lengths, + cl_int *errcode_ret) try { + std::string source; + + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!count || !strings || + any_of(is_zero(), strings, strings + count)) + throw error(CL_INVALID_VALUE); + + // Concatenate all the provided fragments together + for (unsigned i = 0; i < count; ++i) + source += (lengths && lengths[i] ? + std::string(strings[i], strings[i] + lengths[i]) : + std::string(strings[i])); + + // ...and create a program object for them. + ret_error(errcode_ret, CL_SUCCESS); + return new program(*ctx, source); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_program +clCreateProgramWithBinary(cl_context ctx, cl_uint count, + const cl_device_id *devs, const size_t *lengths, + const unsigned char **binaries, cl_int *status_ret, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!count || !devs || !lengths || !binaries) + throw error(CL_INVALID_VALUE); + + if (any_of([&](const cl_device_id dev) { + return !ctx->has_device(dev); + }, devs, devs + count)) + throw error(CL_INVALID_DEVICE); + + // Deserialize the provided binaries, + auto modules = map( + [](const unsigned char *p, size_t l) -> std::pair { + if (!p || !l) + return { CL_INVALID_VALUE, {} }; + + try { + compat::istream::buffer_t bin(p, l); + compat::istream s(bin); + + return { CL_SUCCESS, module::deserialize(s) }; + + } catch (compat::istream::error &e) { + return { CL_INVALID_BINARY, {} }; + } + }, + binaries, binaries + count, lengths); + + // update the status array, + if (status_ret) + std::transform(modules.begin(), modules.end(), status_ret, + keys); + + if (any_of(key_equals(CL_INVALID_VALUE), + modules.begin(), modules.end())) + throw error(CL_INVALID_VALUE); + + if (any_of(key_equals(CL_INVALID_BINARY), + modules.begin(), modules.end())) + throw error(CL_INVALID_BINARY); + + // initialize a program object with them. + ret_error(errcode_ret, CL_SUCCESS); + return new program(*ctx, { devs, devs + count }, + map(values, + modules.begin(), modules.end())); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainProgram(cl_program prog) { + if (!prog) + return CL_INVALID_PROGRAM; + + prog->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseProgram(cl_program prog) { + if (!prog) + return CL_INVALID_PROGRAM; + + if (prog->release()) + delete prog; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clBuildProgram(cl_program prog, cl_uint count, const cl_device_id *devs, + const char *opts, void (*pfn_notify)(cl_program, void *), + void *user_data) try { + if (!prog) + throw error(CL_INVALID_PROGRAM); + + if (bool(count) != bool(devs) || + (!pfn_notify && user_data)) + throw error(CL_INVALID_VALUE); + + if (any_of([&](const cl_device_id dev) { + return !prog->ctx.has_device(dev); + }, devs, devs + count)) + throw error(CL_INVALID_DEVICE); + + prog->build({ devs, devs + count }); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clUnloadCompiler() { + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetProgramInfo(cl_program prog, cl_program_info param, + size_t size, void *buf, size_t *size_ret) { + if (!prog) + return CL_INVALID_PROGRAM; + + switch (param) { + case CL_PROGRAM_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, + prog->ref_count()); + + case CL_PROGRAM_CONTEXT: + return scalar_property(buf, size, size_ret, + &prog->ctx); + + case CL_PROGRAM_NUM_DEVICES: + return scalar_property(buf, size, size_ret, + prog->binaries().size()); + + case CL_PROGRAM_DEVICES: + return vector_property( + buf, size, size_ret, + map(keys, + prog->binaries().begin(), prog->binaries().end())); + + case CL_PROGRAM_SOURCE: + return string_property(buf, size, size_ret, prog->source()); + + case CL_PROGRAM_BINARY_SIZES: + return vector_property( + buf, size, size_ret, + map([](const std::pair &ent) { + compat::ostream::buffer_t bin; + compat::ostream s(bin); + ent.second.serialize(s); + return bin.size(); + }, + prog->binaries().begin(), prog->binaries().end())); + + case CL_PROGRAM_BINARIES: + return matrix_property( + buf, size, size_ret, + map([](const std::pair &ent) { + compat::ostream::buffer_t bin; + compat::ostream s(bin); + ent.second.serialize(s); + return bin; + }, + prog->binaries().begin(), prog->binaries().end())); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clGetProgramBuildInfo(cl_program prog, cl_device_id dev, + cl_program_build_info param, + size_t size, void *buf, size_t *size_ret) { + if (!prog) + return CL_INVALID_PROGRAM; + + if (!prog->ctx.has_device(dev)) + return CL_INVALID_DEVICE; + + switch (param) { + case CL_PROGRAM_BUILD_STATUS: + return scalar_property(buf, size, size_ret, + prog->build_status(dev)); + + case CL_PROGRAM_BUILD_OPTIONS: + return string_property(buf, size, size_ret, prog->build_opts(dev)); + + case CL_PROGRAM_BUILD_LOG: + return string_property(buf, size, size_ret, prog->build_log(dev)); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/queue.cpp b/src/gallium/state_trackers/clover/api/queue.cpp new file mode 100644 index 00000000000..a7905bc4396 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/queue.cpp @@ -0,0 +1,102 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/queue.hpp" + +using namespace clover; + +PUBLIC cl_command_queue +clCreateCommandQueue(cl_context ctx, cl_device_id dev, + cl_command_queue_properties props, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + if (!ctx->has_device(dev)) + throw error(CL_INVALID_DEVICE); + + if (props & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_PROFILING_ENABLE)) + throw error(CL_INVALID_VALUE); + + ret_error(errcode_ret, CL_SUCCESS); + return new command_queue(*ctx, *dev, props); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainCommandQueue(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + q->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseCommandQueue(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + if (q->release()) + delete q; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetCommandQueueInfo(cl_command_queue q, cl_command_queue_info param, + size_t size, void *buf, size_t *size_ret) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + switch (param) { + case CL_QUEUE_CONTEXT: + return scalar_property(buf, size, size_ret, &q->ctx); + + case CL_QUEUE_DEVICE: + return scalar_property(buf, size, size_ret, &q->dev); + + case CL_QUEUE_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, q->ref_count()); + + case CL_QUEUE_PROPERTIES: + return scalar_property(buf, size, size_ret, + q->props()); + + default: + return CL_INVALID_VALUE; + } +} + +PUBLIC cl_int +clFlush(cl_command_queue q) { + if (!q) + return CL_INVALID_COMMAND_QUEUE; + + q->flush(); + return CL_SUCCESS; +} diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp b/src/gallium/state_trackers/clover/api/sampler.cpp new file mode 100644 index 00000000000..32ce22ef90f --- /dev/null +++ b/src/gallium/state_trackers/clover/api/sampler.cpp @@ -0,0 +1,90 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "api/util.hpp" +#include "core/sampler.hpp" + +using namespace clover; + +PUBLIC cl_sampler +clCreateSampler(cl_context ctx, cl_bool norm_mode, + cl_addressing_mode addr_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) try { + if (!ctx) + throw error(CL_INVALID_CONTEXT); + + ret_error(errcode_ret, CL_SUCCESS); + return new sampler(*ctx, norm_mode, addr_mode, filter_mode); + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clRetainSampler(cl_sampler s) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + s->retain(); + return CL_SUCCESS; +} + +PUBLIC cl_int +clReleaseSampler(cl_sampler s) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + if (s->release()) + delete s; + + return CL_SUCCESS; +} + +PUBLIC cl_int +clGetSamplerInfo(cl_sampler s, cl_sampler_info param, + size_t size, void *buf, size_t *size_ret) { + if (!s) + throw error(CL_INVALID_SAMPLER); + + switch (param) { + case CL_SAMPLER_REFERENCE_COUNT: + return scalar_property(buf, size, size_ret, s->ref_count()); + + case CL_SAMPLER_CONTEXT: + return scalar_property(buf, size, size_ret, &s->ctx); + + case CL_SAMPLER_NORMALIZED_COORDS: + return scalar_property(buf, size, size_ret, s->norm_mode()); + + case CL_SAMPLER_ADDRESSING_MODE: + return scalar_property(buf, size, size_ret, + s->addr_mode()); + + case CL_SAMPLER_FILTER_MODE: + return scalar_property(buf, size, size_ret, + s->filter_mode()); + + default: + return CL_INVALID_VALUE; + } +} diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp new file mode 100644 index 00000000000..c67b75e8034 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -0,0 +1,506 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +#include "api/util.hpp" +#include "core/event.hpp" +#include "core/resource.hpp" + +using namespace clover; + +namespace { + typedef resource::point point; + + /// + /// Common argument checking shared by memory transfer commands. + /// + void + validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) { + if (!q) + throw error(CL_INVALID_COMMAND_QUEUE); + + if (bool(num_deps) != bool(deps) || + any_of(is_zero(), deps, deps + num_deps)) + throw error(CL_INVALID_EVENT_WAIT_LIST); + + if (any_of([&](const cl_event ev) { + return &ev->ctx != &q->ctx; + }, deps, deps + num_deps)) + throw error(CL_INVALID_CONTEXT); + } + + /// + /// Memory object-specific argument checking shared by most memory + /// transfer commands. + /// + void + validate_obj(cl_command_queue q, cl_mem obj) { + if (!obj) + throw error(CL_INVALID_MEM_OBJECT); + + if (&obj->ctx != &q->ctx) + throw error(CL_INVALID_CONTEXT); + } + + /// + /// Class that encapsulates the task of mapping an object of type + /// \a T. The return value of get() should be implicitly + /// convertible to \a void *. + /// + template struct __map; + + template<> struct __map { + static void * + get(cl_command_queue q, void *obj, cl_map_flags flags, + size_t offset, size_t size) { + return (char *)obj + offset; + } + }; + + template<> struct __map { + static const void * + get(cl_command_queue q, const void *obj, cl_map_flags flags, + size_t offset, size_t size) { + return (const char *)obj + offset; + } + }; + + template<> struct __map { + static mapping + get(cl_command_queue q, memory_obj *obj, cl_map_flags flags, + size_t offset, size_t size) { + return { *q, obj->resource(q), flags, true, { offset }, { size }}; + } + }; + + /// + /// Software copy from \a src_obj to \a dst_obj. They can be + /// either pointers or memory objects. + /// + template + std::function + soft_copy_op(cl_command_queue q, + T dst_obj, const point &dst_orig, const point &dst_pitch, + S src_obj, const point &src_orig, const point &src_pitch, + const point ®ion) { + return [=](event &) { + auto dst = __map::get(q, dst_obj, CL_MAP_WRITE, + dst_pitch(dst_orig), dst_pitch(region)); + auto src = __map::get(q, src_obj, CL_MAP_READ, + src_pitch(src_orig), src_pitch(region)); + point p; + + for (p[2] = 0; p[2] < region[2]; ++p[2]) { + for (p[1] = 0; p[1] < region[1]; ++p[1]) { + std::memcpy(static_cast(dst) + dst_pitch(p), + static_cast(src) + src_pitch(p), + src_pitch[0] * region[0]); + } + } + }; + } + + /// + /// Hardware copy from \a src_obj to \a dst_obj. + /// + template + std::function + hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig, + S src_obj, const point &src_orig, const point ®ion) { + return [=](event &) { + dst_obj->resource(q).copy(*q, dst_orig, region, + src_obj->resource(q), src_orig); + }; + } +} + +PUBLIC cl_int +clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + size_t offset, size_t size, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr || offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, { 0 }, { 1 }, + obj, { offset }, { 1 }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + size_t offset, size_t size, const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr || offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + obj, { offset }, { 1 }, + ptr, { 0 }, { 1 }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *obj_origin, const size_t *host_origin, + const size_t *region, + size_t obj_row_pitch, size_t obj_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, host_origin, + { 1, host_row_pitch, host_slice_pitch }, + obj, obj_origin, + { 1, obj_row_pitch, obj_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *obj_origin, const size_t *host_origin, + const size_t *region, + size_t obj_row_pitch, size_t obj_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + obj, obj_origin, + { 1, obj_row_pitch, obj_slice_pitch }, + ptr, host_origin, + { 1, host_row_pitch, host_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + size_t src_offset, size_t dst_offset, size_t size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps }, + hard_copy_op(q, dst_obj, { dst_offset }, + src_obj, { src_offset }, + { size, 1, 1 })); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *dst_origin, + const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, + size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, dst_origin, + { 1, dst_row_pitch, dst_slice_pitch }, + src_obj, src_origin, + { 1, src_row_pitch, src_slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *origin, const size_t *region, + size_t row_pitch, size_t slice_pitch, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *img = dynamic_cast(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + ptr, {}, + { 1, row_pitch, slice_pitch }, + obj, origin, + { 1, img->row_pitch(), img->slice_pitch() }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + const size_t *origin, const size_t *region, + size_t row_pitch, size_t slice_pitch, const void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *img = dynamic_cast(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + if (!ptr) + throw error(CL_INVALID_VALUE); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + obj, origin, + { 1, img->row_pitch(), img->slice_pitch() }, + ptr, {}, + { 1, row_pitch, slice_pitch }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *dst_origin, + const size_t *region, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *src_img = dynamic_cast(src_obj); + image *dst_img = dynamic_cast(dst_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_img); + validate_obj(q, dst_img); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps }, + hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + const size_t *src_origin, const size_t *region, + size_t dst_offset, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *src_img = dynamic_cast(src_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_img); + validate_obj(q, dst_obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, { dst_offset }, + { 0, 0, 0 }, + src_obj, src_origin, + { 1, src_img->row_pitch(), src_img->slice_pitch() }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC cl_int +clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, + size_t src_offset, + const size_t *dst_origin, const size_t *region, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + image *dst_img = dynamic_cast(src_obj); + + validate_base(q, num_deps, deps); + validate_obj(q, src_obj); + validate_obj(q, dst_img); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps }, + soft_copy_op(q, + dst_obj, dst_origin, + { 1, dst_img->row_pitch(), dst_img->slice_pitch() }, + src_obj, { src_offset }, + { 0, 0, 0 }, + region)); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} + +PUBLIC void * +clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, + cl_map_flags flags, size_t offset, size_t size, + cl_uint num_deps, const cl_event *deps, + cl_event *ev, cl_int *errcode_ret) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + if (offset > obj->size() || offset + size > obj->size()) + throw error(CL_INVALID_VALUE); + + void *map = obj->resource(q).add_map( + *q, flags, blocking, { offset }, { size }); + + ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER, + { deps, deps + num_deps })); + ret_error(errcode_ret, CL_SUCCESS); + return map; + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC void * +clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking, + cl_map_flags flags, + const size_t *origin, const size_t *region, + size_t *row_pitch, size_t *slice_pitch, + cl_uint num_deps, const cl_event *deps, + cl_event *ev, cl_int *errcode_ret) try { + image *img = dynamic_cast(obj); + + validate_base(q, num_deps, deps); + validate_obj(q, img); + + void *map = obj->resource(q).add_map( + *q, flags, blocking, origin, region); + + ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE, + { deps, deps + num_deps })); + ret_error(errcode_ret, CL_SUCCESS); + return map; + +} catch (error &e) { + ret_error(errcode_ret, e); + return NULL; +} + +PUBLIC cl_int +clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr, + cl_uint num_deps, const cl_event *deps, + cl_event *ev) try { + validate_base(q, num_deps, deps); + validate_obj(q, obj); + + hard_event *hev = new hard_event( + *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps }, + [=](event &) { + obj->resource(q).del_map(ptr); + }); + + ret_object(ev, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); +} diff --git a/src/gallium/state_trackers/clover/api/util.hpp b/src/gallium/state_trackers/clover/api/util.hpp new file mode 100644 index 00000000000..2f9ec1f6a10 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/util.hpp @@ -0,0 +1,166 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CL_UTIL_HPP__ +#define __CL_UTIL_HPP__ + +#include +#include +#include +#include + +#include "core/base.hpp" +#include "pipe/p_compiler.h" + +namespace clover { + /// + /// Return a matrix (a container of containers) in \a buf with + /// argument and bounds checking. Intended to be used by + /// implementations of \a clGetXXXInfo(). + /// + template + cl_int + matrix_property(void *buf, size_t size, size_t *size_ret, const V& v) { + if (buf && size < sizeof(T *) * v.size()) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = sizeof(T *) * v.size(); + + if (buf) + for_each([](typename V::value_type src, T *dst) { + if (dst) + std::copy(src.begin(), src.end(), dst); + }, + v.begin(), v.end(), (T **)buf); + + return CL_SUCCESS; + } + + /// + /// Return a vector in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + template + cl_int + vector_property(void *buf, size_t size, size_t *size_ret, const V& v) { + if (buf && size < sizeof(T) * v.size()) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = sizeof(T) * v.size(); + if (buf) + std::copy(v.begin(), v.end(), (T *)buf); + + return CL_SUCCESS; + } + + /// + /// Return a scalar in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + template + cl_int + scalar_property(void *buf, size_t size, size_t *size_ret, T v) { + return vector_property(buf, size, size_ret, std::vector(1, v)); + } + + /// + /// Return a string in \a buf with argument and bounds checking. + /// Intended to be used by implementations of \a clGetXXXInfo(). + /// + inline cl_int + string_property(void *buf, size_t size, size_t *size_ret, + const std::string &v) { + if (buf && size < v.size() + 1) + return CL_INVALID_VALUE; + + if (size_ret) + *size_ret = v.size() + 1; + if (buf) + std::strcpy((char *)buf, v.c_str()); + + return CL_SUCCESS; + } + + /// + /// Convert a NULL-terminated property list into an std::map. + /// + template + std::map + property_map(const T *props) { + std::map m; + + while (props && *props) { + T key = *props++; + T value = *props++; + + if (m.count(key)) + throw clover::error(CL_INVALID_PROPERTY); + + m.insert({ key, value }); + } + + return m; + } + + /// + /// Convert an std::map into a NULL-terminated property list. + /// + template + std::vector + property_vector(const std::map &m) { + std::vector v; + + for (auto &p : m) { + v.push_back(p.first); + v.push_back(p.second); + } + + v.push_back(0); + return v; + } + + /// + /// Return an error code in \a p if non-zero. + /// + inline void + ret_error(cl_int *p, const clover::error &e) { + if (p) + *p = e.get(); + } + + /// + /// Return a reference-counted object in \a p if non-zero. + /// Otherwise release object ownership. + /// + template + void + ret_object(T p, S v) { + if (p) + *p = v; + else + v->release(); + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/base.hpp b/src/gallium/state_trackers/clover/core/base.hpp new file mode 100644 index 00000000000..19053f39235 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/base.hpp @@ -0,0 +1,285 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_BASE_HPP__ +#define __CORE_BASE_HPP__ + +#include +#include +#include +#include +#include +#include + +#include "CL/cl.h" + +/// +/// Main namespace of the CL state tracker. +/// +namespace clover { + /// + /// Class that represents an error that can be converted to an + /// OpenCL status code. + /// + class error : public std::runtime_error { + public: + error(cl_int code, std::string what = "") : + std::runtime_error(what), code(code) { + } + + cl_int get() const { + return code; + } + + protected: + cl_int code; + }; + + /// + /// Base class for objects that support reference counting. + /// + class ref_counter { + public: + ref_counter() : __ref_count(1) {} + + unsigned ref_count() { + return __ref_count; + } + + void retain() { + __ref_count++; + } + + bool release() { + return (--__ref_count) == 0; + } + + private: + std::atomic __ref_count; + }; + + /// + /// Intrusive smart pointer for objects that implement the + /// clover::ref_counter interface. + /// + template + class ref_ptr { + public: + ref_ptr(T *q = NULL) : p(NULL) { + reset(q); + } + + template + ref_ptr(const ref_ptr &ref) : p(NULL) { + reset(ref.p); + } + + ~ref_ptr() { + reset(NULL); + } + + void reset(T *q = NULL) { + if (q) + q->retain(); + if (p && p->release()) + delete p; + p = q; + } + + ref_ptr &operator=(const ref_ptr &ref) { + reset(ref.p); + return *this; + } + + T *operator*() const { + return p; + } + + T *operator->() const { + return p; + } + + operator bool() const { + return p; + } + + private: + T *p; + }; + + /// + /// Transfer the caller's ownership of a reference-counted object + /// to a clover::ref_ptr smart pointer. + /// + template + inline ref_ptr + transfer(T *p) { + ref_ptr ref { p }; + p->release(); + return ref; + } + + template + struct __iter_helper { + template + static T + step(F op, S state, Its its, Args... args) { + return __iter_helper::step( + op, state, its, *(std::get(its)++), args...); + } + }; + + template + struct __iter_helper { + template + static T + step(F op, S state, Its its, Args... args) { + return op(state, *(std::get<0>(its)++), args...); + } + }; + + struct __empty {}; + + template + struct __iter_helper { + template + static T + step(F op, __empty state, Its its, Args... args) { + return op(*(std::get<0>(its)++), args...); + } + }; + + template + struct __result_helper { + typedef typename std::remove_const< + typename std::result_of< + F (typename std::iterator_traits::value_type...) + >::type + >::type type; + }; + + /// + /// Iterate \a op on the result of zipping all the specified + /// iterators together. + /// + /// Similar to std::for_each, but it accepts functions of an + /// arbitrary number of arguments. + /// + template + F + for_each(F op, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + __iter_helper::step( + op, {}, std::tie(it0, its...)); + + return op; + } + + /// + /// Iterate \a op on the result of zipping all the specified + /// iterators together, storing return values in a new container. + /// + /// Similar to std::transform, but it accepts functions of an + /// arbitrary number of arguments and it doesn't have to be + /// provided with an output iterator. + /// + template::type>> + C + map(F op, It0 it0, It0 end0, Its... its) { + C c; + + while (it0 != end0) + c.push_back( + __iter_helper + ::step(op, {}, std::tie(it0, its...))); + + return c; + } + + /// + /// Reduce the result of zipping all the specified iterators + /// together, using iterative application of \a op from left to + /// right. + /// + /// Similar to std::accumulate, but it accepts functions of an + /// arbitrary number of arguments. + /// + template + T + fold(F op, T a, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + a = __iter_helper::step( + op, a, std::tie(it0, its...)); + + return a; + } + + /// + /// Iterate \a op on the result of zipping the specified iterators + /// together, checking if any of the evaluations returns \a true. + /// + /// Similar to std::any_of, but it accepts functions of an + /// arbitrary number of arguments. + /// + template + bool + any_of(F op, It0 it0, It0 end0, Its... its) { + while (it0 != end0) + if (__iter_helper::step( + op, {}, std::tie(it0, its...))) + return true; + + return false; + } + + template + T + keys(const std::pair &ent) { + return ent.first; + } + + template + std::function &)> + key_equals(const T &x) { + return [=](const std::pair &ent) { + return ent.first == x; + }; + } + + template + S + values(const std::pair &ent) { + return ent.second; + } + + template + std::function + is_zero() { + return [](const T &x) { + return x == 0; + }; + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/compat.hpp b/src/gallium/state_trackers/clover/core/compat.hpp new file mode 100644 index 00000000000..c0057af3258 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/compat.hpp @@ -0,0 +1,290 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_COMPAT_HPP__ +#define __CORE_COMPAT_HPP__ + +#include +#include +#include +#include +#include + + +namespace clover { + namespace compat { + // XXX - For cases where we can't rely on STL... I.e. the + // interface between code compiled as C++98 and C++11 + // source. Get rid of this as soon as everything can be + // compiled as C++11. + + template + class vector { + protected: + static T * + alloc(int n, const T *q, int m) { + T *p = reinterpret_cast(std::malloc(n * sizeof(T))); + + for (int i = 0; i < m; ++i) + new(&p[i]) T(q[i]); + + return p; + } + + static void + free(int n, T *p) { + for (int i = 0; i < n; ++i) + p[i].~T(); + + std::free(p); + } + + public: + vector() : p(NULL), n(0) { + } + + vector(const vector &v) : p(alloc(v.n, v.p, v.n)), n(v.n) { + } + + vector(T *p, size_t n) : p(alloc(n, p, n)), n(n) { + } + + template + vector(const C &v) : + p(alloc(v.size(), &*v.begin(), v.size())), n(v.size()) { + } + + ~vector() { + free(n, p); + } + + vector & + operator=(const vector &v) { + free(n, p); + + p = alloc(v.n, v.p, v.n); + n = v.n; + + return *this; + } + + void + reserve(size_t m) { + if (n < m) { + T *q = alloc(m, p, n); + free(n, p); + + p = q; + n = m; + } + } + + void + resize(size_t m, T x = T()) { + size_t n = size(); + + reserve(m); + + for (size_t i = n; i < m; ++i) + new(&p[i]) T(x); + } + + void + push_back(const T &x) { + size_t n = size(); + reserve(n + 1); + new(&p[n]) T(x); + } + + size_t + size() const { + return n; + } + + T * + begin() { + return p; + } + + const T * + begin() const { + return p; + } + + T * + end() { + return p + n; + } + + const T * + end() const { + return p + n; + } + + T & + operator[](int i) { + return p[i]; + } + + const T & + operator[](int i) const { + return p[i]; + } + + private: + T *p; + size_t n; + }; + + template + class vector_ref { + public: + vector_ref(T *p, size_t n) : p(p), n(n) { + } + + template + vector_ref(C &v) : p(&*v.begin()), n(v.size()) { + } + + size_t + size() const { + return n; + } + + T * + begin() { + return p; + } + + const T * + begin() const { + return p; + } + + T * + end() { + return p + n; + } + + const T * + end() const { + return p + n; + } + + T & + operator[](int i) { + return p[i]; + } + + const T & + operator[](int i) const { + return p[i]; + } + + private: + T *p; + size_t n; + }; + + class istream { + public: + typedef vector_ref buffer_t; + + class error { + public: + virtual ~error() {} + }; + + istream(const buffer_t &buf) : buf(buf), offset(0) {} + + void + read(char *p, size_t n) { + if (offset + n > buf.size()) + throw error(); + + std::memcpy(p, buf.begin() + offset, n); + offset += n; + } + + private: + const buffer_t &buf; + size_t offset; + }; + + class ostream { + public: + typedef vector buffer_t; + + ostream(buffer_t &buf) : buf(buf), offset(buf.size()) {} + + void + write(const char *p, size_t n) { + buf.resize(offset + n); + std::memcpy(buf.begin() + offset, p, n); + offset += n; + } + + private: + buffer_t &buf; + size_t offset; + }; + + class string : public vector_ref { + public: + string(const char *p) : vector_ref(p, std::strlen(p)) { + } + + template + string(const C &v) : vector_ref(v) { + } + + operator std::string() const { + return std::string(begin(), end()); + } + + const char * + find(const string &s) const { + for (size_t i = 0; i + s.size() < size(); ++i) { + if (!std::memcmp(begin() + i, s.begin(), s.size())) + return begin() + i; + } + + return end(); + } + }; + + template + bool + operator==(const vector_ref &a, const vector_ref &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) + if (a[i] != b[i]) + return false; + + return true; + } + } +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp new file mode 100644 index 00000000000..a3998d5e2fb --- /dev/null +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -0,0 +1,53 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_COMPILER_HPP__ +#define __CORE_COMPILER_HPP__ + +#include "core/compat.hpp" +#include "core/module.hpp" + +namespace clover { + class build_error { + public: + build_error(const compat::string &log) : log(log) { + } + + virtual ~build_error() { + } + + compat::string what() { + return log; + } + + private: + compat::vector log; + }; + + module compile_program_llvm(const compat::string &source, + const compat::string &target); + + module compile_program_tgsi(const compat::string &source, + const compat::string &target); +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/context.cpp b/src/gallium/state_trackers/clover/core/context.cpp new file mode 100644 index 00000000000..6e09a1acae0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/context.cpp @@ -0,0 +1,37 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +#include "core/context.hpp" + +using namespace clover; + +_cl_context::_cl_context(const std::vector &props, + const std::vector &devs) : + devs(devs), __props(props) { +} + +bool +_cl_context::has_device(clover::device *dev) const { + return std::count(devs.begin(), devs.end(), dev); +} diff --git a/src/gallium/state_trackers/clover/core/context.hpp b/src/gallium/state_trackers/clover/core/context.hpp new file mode 100644 index 00000000000..d783fb6b14b --- /dev/null +++ b/src/gallium/state_trackers/clover/core/context.hpp @@ -0,0 +1,51 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_CONTEXT_HPP__ +#define __CORE_CONTEXT_HPP__ + +#include "core/base.hpp" +#include "core/device.hpp" + +namespace clover { + typedef struct _cl_context context; +} + +struct _cl_context : public clover::ref_counter { +public: + _cl_context(const std::vector &props, + const std::vector &devs); + _cl_context(const _cl_context &ctx) = delete; + + bool has_device(clover::device *dev) const; + + const std::vector &props() const { + return __props; + } + + const std::vector devs; + +private: + std::vector __props; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp new file mode 100644 index 00000000000..8390f3f4abb --- /dev/null +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -0,0 +1,179 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/device.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +using namespace clover; + +namespace { + template + std::vector + get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) { + int sz = pipe->get_compute_param(pipe, cap, NULL); + std::vector v(sz / sizeof(T)); + + pipe->get_compute_param(pipe, cap, &v.front()); + return v; + } +} + +_cl_device_id::_cl_device_id(pipe_loader_device *ldev) : ldev(ldev) { + pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR); + if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) + throw error(CL_INVALID_DEVICE); +} + +_cl_device_id::_cl_device_id(_cl_device_id &&dev) : pipe(dev.pipe), ldev(dev.ldev) { + dev.ldev = NULL; + dev.pipe = NULL; +} + +_cl_device_id::~_cl_device_id() { + if (pipe) + pipe->destroy(pipe); + if (ldev) + pipe_loader_release(&ldev, 1); +} + +cl_device_type +_cl_device_id::type() const { + switch (ldev->type) { + case PIPE_LOADER_DEVICE_SOFTWARE: + return CL_DEVICE_TYPE_CPU; + case PIPE_LOADER_DEVICE_PCI: + return CL_DEVICE_TYPE_GPU; + default: + assert(0); + return 0; + } +} + +cl_uint +_cl_device_id::vendor_id() const { + switch (ldev->type) { + case PIPE_LOADER_DEVICE_SOFTWARE: + return 0; + case PIPE_LOADER_DEVICE_PCI: + return ldev->pci.vendor_id; + default: + assert(0); + return 0; + } +} + +size_t +_cl_device_id::max_images_read() const { + return PIPE_MAX_SHADER_RESOURCES; +} + +size_t +_cl_device_id::max_images_write() const { + return PIPE_MAX_SHADER_RESOURCES; +} + +cl_uint +_cl_device_id::max_image_levels_2d() const { + return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); +} + +cl_uint +_cl_device_id::max_image_levels_3d() const { + return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS); +} + +cl_uint +_cl_device_id::max_samplers() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS); +} + +cl_ulong +_cl_device_id::max_mem_global() const { + return get_compute_param(pipe, + PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_mem_local() const { + return get_compute_param(pipe, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_mem_input() const { + return get_compute_param(pipe, + PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0]; +} + +cl_ulong +_cl_device_id::max_const_buffer_size() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_CONSTS) * 16; +} + +cl_uint +_cl_device_id::max_const_buffers() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_CONST_BUFFERS); +} + +std::vector +_cl_device_id::max_block_size() const { + return get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); +} + +std::string +_cl_device_id::device_name() const { + return pipe->get_name(pipe); +} + +std::string +_cl_device_id::vendor_name() const { + return pipe->get_vendor(pipe); +} + +std::string +_cl_device_id::ir_target() const { + switch (pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_PREFERRED_IR)) { + case PIPE_SHADER_IR_TGSI: + return "tgsi"; + default: + assert(0); + return ""; + } +} + +device_registry::device_registry() { + int n = pipe_loader_probe(NULL, 0); + std::vector ldevs(n); + + pipe_loader_probe(&ldevs.front(), n); + + for (pipe_loader_device *ldev : ldevs) { + try { + devs.emplace_back(ldev); + } catch (error &) {} + } +} diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp new file mode 100644 index 00000000000..8f284ba5e42 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -0,0 +1,107 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_DEVICE_HPP__ +#define __CORE_DEVICE_HPP__ + +#include +#include + +#include "core/base.hpp" +#include "core/format.hpp" +#include "pipe-loader/pipe_loader.h" + +namespace clover { + typedef struct _cl_device_id device; + class root_resource; + class hard_event; +} + +struct _cl_device_id { +public: + _cl_device_id(pipe_loader_device *ldev); + _cl_device_id(_cl_device_id &&dev); + _cl_device_id(const _cl_device_id &dev) = delete; + ~_cl_device_id(); + + cl_device_type type() const; + cl_uint vendor_id() const; + size_t max_images_read() const; + size_t max_images_write() const; + cl_uint max_image_levels_2d() const; + cl_uint max_image_levels_3d() const; + cl_uint max_samplers() const; + cl_ulong max_mem_global() const; + cl_ulong max_mem_local() const; + cl_ulong max_mem_input() const; + cl_ulong max_const_buffer_size() const; + cl_uint max_const_buffers() const; + + std::vector max_block_size() const; + std::string device_name() const; + std::string vendor_name() const; + std::string ir_target() const; + + friend struct _cl_command_queue; + friend class clover::root_resource; + friend class clover::hard_event; + friend std::set + clover::supported_formats(cl_context, cl_mem_object_type); + +private: + pipe_screen *pipe; + pipe_loader_device *ldev; +}; + +namespace clover { + /// + /// Container of all the compute devices that are available in the + /// system. + /// + class device_registry { + public: + typedef std::vector::iterator iterator; + + device_registry(); + + iterator begin() { + return devs.begin(); + } + + iterator end() { + return devs.end(); + } + + device &front() { + return devs.front(); + } + + device &back() { + return devs.back(); + } + + protected: + std::vector devs; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp new file mode 100644 index 00000000000..aa287e9a0c9 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -0,0 +1,175 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/event.hpp" +#include "pipe/p_screen.h" + +using namespace clover; + +_cl_event::_cl_event(clover::context &ctx, + std::vector deps, + action action_ok, action action_fail) : + ctx(ctx), __status(0), wait_count(1), + action_ok(action_ok), action_fail(action_fail) { + for (auto ev : deps) + ev->chain(this); +} + +_cl_event::~_cl_event() { +} + +void +_cl_event::trigger() { + if (!--wait_count) { + action_ok(*this); + + while (!__chain.empty()) { + __chain.back()->trigger(); + __chain.pop_back(); + } + } +} + +void +_cl_event::abort(cl_int status) { + __status = status; + action_fail(*this); + + while (!__chain.empty()) { + __chain.back()->abort(status); + __chain.pop_back(); + } +} + +bool +_cl_event::signalled() const { + return !wait_count; +} + +void +_cl_event::chain(clover::event *ev) { + if (wait_count) { + ev->wait_count++; + __chain.push_back(ev); + ev->deps.push_back(this); + } +} + +hard_event::hard_event(clover::command_queue &q, cl_command_type command, + std::vector deps, action action) : + _cl_event(q.ctx, deps, action, [](event &ev){}), + __queue(q), __command(command), __fence(NULL) { + q.sequence(this); + trigger(); +} + +hard_event::~hard_event() { + pipe_screen *screen = queue()->dev.pipe; + screen->fence_reference(screen, &__fence, NULL); +} + +cl_int +hard_event::status() const { + pipe_screen *screen = queue()->dev.pipe; + + if (__status < 0) + return __status; + + else if (!__fence) + return CL_QUEUED; + + else if (!screen->fence_signalled(screen, __fence)) + return CL_SUBMITTED; + + else + return CL_COMPLETE; +} + +cl_command_queue +hard_event::queue() const { + return &__queue; +} + +cl_command_type +hard_event::command() const { + return __command; +} + +void +hard_event::wait() const { + pipe_screen *screen = queue()->dev.pipe; + + if (status() == CL_QUEUED) + queue()->flush(); + + if (!__fence || + !screen->fence_finish(screen, __fence, PIPE_TIMEOUT_INFINITE)) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); +} + +void +hard_event::fence(pipe_fence_handle *fence) { + pipe_screen *screen = queue()->dev.pipe; + screen->fence_reference(screen, &__fence, fence); +} + +soft_event::soft_event(clover::context &ctx, + std::vector deps, + bool __trigger, action action) : + _cl_event(ctx, deps, action, action) { + if (__trigger) + trigger(); +} + +cl_int +soft_event::status() const { + if (__status < 0) + return __status; + + else if (!signalled() || + any_of([](const ref_ptr &ev) { + return ev->status() != CL_COMPLETE; + }, deps.begin(), deps.end())) + return CL_SUBMITTED; + + else + return CL_COMPLETE; +} + +cl_command_queue +soft_event::queue() const { + return NULL; +} + +cl_command_type +soft_event::command() const { + return CL_COMMAND_USER; +} + +void +soft_event::wait() const { + for (auto ev : deps) + ev->wait(); + + if (status() != CL_COMPLETE) + throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); +} diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp new file mode 100644 index 00000000000..ea4ac4ae43c --- /dev/null +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -0,0 +1,138 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_EVENT_HPP__ +#define __CORE_EVENT_HPP__ + +#include + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_event event; +} + +/// +/// Class that represents a task that might be executed asynchronously +/// at some point in the future. +/// +/// An event consists of a list of dependencies, a boolean signalled() +/// flag, and an associated task. An event is considered signalled as +/// soon as all its dependencies (if any) are signalled as well, and +/// the trigger() method is called; at that point the associated task +/// will be started through the specified \a action_ok. If the +/// abort() method is called instead, the specified \a action_fail is +/// executed and the associated task will never be started. Dependent +/// events will be aborted recursively. +/// +/// The execution status of the associated task can be queried using +/// the status() method, and it can be waited for completion using the +/// wait() method. +/// +struct _cl_event : public clover::ref_counter { +public: + typedef std::function action; + + _cl_event(clover::context &ctx, std::vector deps, + action action_ok, action action_fail); + virtual ~_cl_event(); + + void trigger(); + void abort(cl_int status); + bool signalled() const; + + virtual cl_int status() const = 0; + virtual cl_command_queue queue() const = 0; + virtual cl_command_type command() const = 0; + virtual void wait() const = 0; + + clover::context &ctx; + +protected: + void chain(clover::event *ev); + + cl_int __status; + std::vector> deps; + +private: + unsigned wait_count; + action action_ok; + action action_fail; + std::vector> __chain; +}; + +namespace clover { + /// + /// Class that represents a task executed by a command queue. + /// + /// Similar to a normal clover::event. In addition it's associated + /// with a given command queue \a q and a given OpenCL \a command. + /// hard_event instances created for the same queue are implicitly + /// ordered with respect to each other, and they are implicitly + /// triggered on construction. + /// + /// A hard_event is considered complete when the associated + /// hardware task finishes execution. + /// + class hard_event : public event { + public: + hard_event(clover::command_queue &q, cl_command_type command, + std::vector deps, + action action = [](event &){}); + ~hard_event(); + + virtual cl_int status() const; + virtual cl_command_queue queue() const; + virtual cl_command_type command() const; + virtual void wait() const; + + friend class ::_cl_command_queue; + + private: + virtual void fence(pipe_fence_handle *fence); + + clover::command_queue &__queue; + cl_command_type __command; + pipe_fence_handle *__fence; + }; + + /// + /// Class that represents a software event. + /// + /// A soft_event is not associated with any specific hardware task + /// or command queue. It's considered complete as soon as all its + /// dependencies finish execution. + /// + class soft_event : public event { + public: + soft_event(clover::context &ctx, std::vector deps, + bool trigger, action action = [](event &){}); + + virtual cl_int status() const; + virtual cl_command_queue queue() const; + virtual cl_command_type command() const; + virtual void wait() const; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/format.cpp b/src/gallium/state_trackers/clover/core/format.cpp new file mode 100644 index 00000000000..8f6e14d6567 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/format.cpp @@ -0,0 +1,167 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +#include "core/format.hpp" +#include "core/memory.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +namespace clover { + static const std::map formats { + { { CL_BGRA, CL_UNORM_INT8 }, PIPE_FORMAT_B8G8R8A8_UNORM }, + { { CL_ARGB, CL_UNORM_INT8 }, PIPE_FORMAT_A8R8G8B8_UNORM }, + { { CL_RGB, CL_UNORM_SHORT_565 }, PIPE_FORMAT_B5G6R5_UNORM }, + { { CL_LUMINANCE, CL_UNORM_INT8 }, PIPE_FORMAT_L8_UNORM }, + { { CL_A, CL_UNORM_INT8 }, PIPE_FORMAT_A8_UNORM }, + { { CL_INTENSITY, CL_UNORM_INT8 }, PIPE_FORMAT_I8_UNORM }, + { { CL_LUMINANCE, CL_UNORM_INT16 }, PIPE_FORMAT_L16_UNORM }, + { { CL_R, CL_FLOAT }, PIPE_FORMAT_R32_FLOAT }, + { { CL_RG, CL_FLOAT }, PIPE_FORMAT_R32G32_FLOAT }, + { { CL_RGB, CL_FLOAT }, PIPE_FORMAT_R32G32B32_FLOAT }, + { { CL_RGBA, CL_FLOAT }, PIPE_FORMAT_R32G32B32A32_FLOAT }, + { { CL_R, CL_UNORM_INT16 }, PIPE_FORMAT_R16_UNORM }, + { { CL_RG, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16_UNORM }, + { { CL_RGB, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16_UNORM }, + { { CL_RGBA, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_UNORM }, + { { CL_R, CL_SNORM_INT16 }, PIPE_FORMAT_R16_SNORM }, + { { CL_RG, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16_SNORM }, + { { CL_RGB, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16_SNORM }, + { { CL_RGBA, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_SNORM }, + { { CL_R, CL_UNORM_INT8 }, PIPE_FORMAT_R8_UNORM }, + { { CL_RG, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8_UNORM }, + { { CL_RGB, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8_UNORM }, + { { CL_RGBA, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_UNORM }, + { { CL_R, CL_SNORM_INT8 }, PIPE_FORMAT_R8_SNORM }, + { { CL_RG, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8_SNORM }, + { { CL_RGB, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8_SNORM }, + { { CL_RGBA, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_SNORM }, + { { CL_R, CL_HALF_FLOAT }, PIPE_FORMAT_R16_FLOAT }, + { { CL_RG, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16_FLOAT }, + { { CL_RGB, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16_FLOAT }, + { { CL_RGBA, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16A16_FLOAT }, + { { CL_RGBx, CL_UNORM_SHORT_555 }, PIPE_FORMAT_B5G5R5X1_UNORM }, + { { CL_RGBx, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8X8_UNORM }, + { { CL_A, CL_UNORM_INT16 }, PIPE_FORMAT_A16_UNORM }, + { { CL_INTENSITY, CL_UNORM_INT16 }, PIPE_FORMAT_I16_UNORM }, + { { CL_LUMINANCE, CL_SNORM_INT8 }, PIPE_FORMAT_L8_SNORM }, + { { CL_INTENSITY, CL_SNORM_INT8 }, PIPE_FORMAT_I8_SNORM }, + { { CL_A, CL_SNORM_INT16 }, PIPE_FORMAT_A16_SNORM }, + { { CL_LUMINANCE, CL_SNORM_INT16 }, PIPE_FORMAT_L16_SNORM }, + { { CL_INTENSITY, CL_SNORM_INT16 }, PIPE_FORMAT_I16_SNORM }, + { { CL_A, CL_HALF_FLOAT }, PIPE_FORMAT_A16_FLOAT }, + { { CL_LUMINANCE, CL_HALF_FLOAT }, PIPE_FORMAT_L16_FLOAT }, + { { CL_INTENSITY, CL_HALF_FLOAT }, PIPE_FORMAT_I16_FLOAT }, + { { CL_A, CL_FLOAT }, PIPE_FORMAT_A32_FLOAT }, + { { CL_LUMINANCE, CL_FLOAT }, PIPE_FORMAT_L32_FLOAT }, + { { CL_INTENSITY, CL_FLOAT }, PIPE_FORMAT_I32_FLOAT }, + { { CL_RA, CL_UNORM_INT8 }, PIPE_FORMAT_R8A8_UNORM }, + { { CL_R, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8_UINT }, + { { CL_RG, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8_UINT }, + { { CL_RGB, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_UINT }, + { { CL_R, CL_SIGNED_INT8 }, PIPE_FORMAT_R8_SINT }, + { { CL_RG, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8_SINT }, + { { CL_RGB, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8_SINT }, + { { CL_RGBA, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_SINT }, + { { CL_R, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16_UINT }, + { { CL_RG, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16_UINT }, + { { CL_RGB, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_UINT }, + { { CL_R, CL_SIGNED_INT16 }, PIPE_FORMAT_R16_SINT }, + { { CL_RG, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16_SINT }, + { { CL_RGB, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16_SINT }, + { { CL_RGBA, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_SINT }, + { { CL_R, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32_UINT }, + { { CL_RG, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32_UINT }, + { { CL_RGB, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32_UINT }, + { { CL_RGBA, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_UINT }, + { { CL_R, CL_SIGNED_INT32 }, PIPE_FORMAT_R32_SINT }, + { { CL_RG, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32_SINT }, + { { CL_RGB, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32_SINT }, + { { CL_RGBA, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_SINT }, + { { CL_A, CL_UNSIGNED_INT8 }, PIPE_FORMAT_A8_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT8 }, PIPE_FORMAT_I8_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT8 }, PIPE_FORMAT_L8_UINT }, + { { CL_A, CL_SIGNED_INT8 }, PIPE_FORMAT_A8_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT8 }, PIPE_FORMAT_I8_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT8 }, PIPE_FORMAT_L8_SINT }, + { { CL_A, CL_UNSIGNED_INT16 }, PIPE_FORMAT_A16_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT16 }, PIPE_FORMAT_I16_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT16 }, PIPE_FORMAT_L16_UINT }, + { { CL_A, CL_SIGNED_INT16 }, PIPE_FORMAT_A16_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT16 }, PIPE_FORMAT_I16_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT16 }, PIPE_FORMAT_L16_SINT }, + { { CL_A, CL_UNSIGNED_INT32 }, PIPE_FORMAT_A32_UINT }, + { { CL_INTENSITY, CL_UNSIGNED_INT32 }, PIPE_FORMAT_I32_UINT }, + { { CL_LUMINANCE, CL_UNSIGNED_INT32 }, PIPE_FORMAT_L32_UINT }, + { { CL_A, CL_SIGNED_INT32 }, PIPE_FORMAT_A32_SINT }, + { { CL_INTENSITY, CL_SIGNED_INT32 }, PIPE_FORMAT_I32_SINT }, + { { CL_LUMINANCE, CL_SIGNED_INT32 }, PIPE_FORMAT_L32_SINT } + }; + + pipe_texture_target + translate_target(cl_mem_object_type type) { + switch (type) { + case CL_MEM_OBJECT_BUFFER: + return PIPE_BUFFER; + case CL_MEM_OBJECT_IMAGE2D: + return PIPE_TEXTURE_2D; + case CL_MEM_OBJECT_IMAGE3D: + return PIPE_TEXTURE_3D; + default: + throw error(CL_INVALID_VALUE); + } + } + + pipe_format + translate_format(const cl_image_format &format) { + auto it = formats.find(format); + + if (it == formats.end()) + throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); + + return it->second; + } + + std::set + supported_formats(cl_context ctx, cl_mem_object_type type) { + std::set s; + pipe_texture_target target = translate_target(type); + unsigned bindings = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE); + + for (auto f : formats) { + if (std::all_of(ctx->devs.begin(), ctx->devs.end(), + [=](const device *dev) { + return dev->pipe->is_format_supported( + dev->pipe, f.second, target, 1, bindings); + })) + s.insert(f.first); + } + + return s; + } +} diff --git a/src/gallium/state_trackers/clover/core/format.hpp b/src/gallium/state_trackers/clover/core/format.hpp new file mode 100644 index 00000000000..a24cbf37621 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/format.hpp @@ -0,0 +1,51 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_FORMAT_HPP__ +#define __CORE_FORMAT_HPP__ + +#include + +#include "core/base.hpp" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" + +namespace clover { + pipe_texture_target translate_target(cl_mem_object_type type); + pipe_format translate_format(const cl_image_format &format); + + /// + /// Return all the image formats supported by a given context for + /// the given memory object type. + /// + std::set supported_formats(cl_context ctx, + cl_mem_object_type type); +} + +static inline bool +operator<(const cl_image_format &a, const cl_image_format &b) { + return (a.image_channel_order != b.image_channel_order ? + a.image_channel_order < b.image_channel_order : + a.image_channel_data_type < b.image_channel_data_type); +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/geometry.hpp b/src/gallium/state_trackers/clover/core/geometry.hpp new file mode 100644 index 00000000000..027264e72f0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/geometry.hpp @@ -0,0 +1,72 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_GEOMETRY_HPP__ +#define __CORE_GEOMETRY_HPP__ + +#include +#include + +namespace clover { + /// + /// N-dimensional coordinate array. + /// + template + class point { + public: + point() : a() { + } + + point(std::initializer_list v) { + auto it = std::copy(v.begin(), v.end(), a.begin()); + std::fill(it, a.end(), 0); + } + + point(const T *v) { + std::copy(v, v + N, a.begin()); + } + + T &operator[](int i) { + return a[i]; + } + + const T &operator[](int i) const { + return a[i]; + } + + point operator+(const point &p) const { + point q; + std::transform(a.begin(), a.end(), p.a.begin(), + q.a.begin(), std::plus()); + return q; + } + + T operator()(const point &p) const { + return std::inner_product(p.a.begin(), p.a.end(), a.begin(), 0); + } + + protected: + std::array a; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp new file mode 100644 index 00000000000..6fa8bd63453 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -0,0 +1,393 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/kernel.hpp" +#include "core/resource.hpp" +#include "pipe/p_context.h" + +using namespace clover; + +_cl_kernel::_cl_kernel(clover::program &prog, + const std::string &name, + const std::vector &args) : + prog(prog), __name(name), exec(*this) { + for (auto arg : args) { + if (arg.type == module::argument::scalar) + this->args.emplace_back(new scalar_argument(arg.size)); + else if (arg.type == module::argument::global) + this->args.emplace_back(new global_argument(arg.size)); + else if (arg.type == module::argument::local) + this->args.emplace_back(new local_argument()); + else if (arg.type == module::argument::constant) + this->args.emplace_back(new constant_argument()); + else if (arg.type == module::argument::image2d_rd || + arg.type == module::argument::image3d_rd) + this->args.emplace_back(new image_rd_argument()); + else if (arg.type == module::argument::image2d_wr || + arg.type == module::argument::image3d_wr) + this->args.emplace_back(new image_wr_argument()); + else if (arg.type == module::argument::sampler) + this->args.emplace_back(new sampler_argument()); + else + throw error(CL_INVALID_KERNEL_DEFINITION); + } +} + +template +static inline std::vector +pad_vector(clover::command_queue &q, const V &v, T x) { + std::vector w { v.begin(), v.end() }; + w.resize(q.dev.max_block_size().size(), x); + return w; +} + +void +_cl_kernel::launch(clover::command_queue &q, + const std::vector &grid_offset, + const std::vector &grid_size, + const std::vector &block_size) { + void *st = exec.bind(&q); + auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; }, + exec.g_handles.begin(), exec.g_handles.end()); + + q.pipe->bind_compute_state(q.pipe, st); + q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), + exec.samplers.data()); + q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), + exec.sviews.data()); + q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), + exec.resources.data()); + q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), + exec.g_buffers.data(), g_handles.data()); + + q.pipe->launch_grid(q.pipe, + pad_vector(q, block_size, 1).data(), + pad_vector(q, grid_size, 1).data(), + module(q).sym(__name).offset, + exec.input.data()); + + q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); + q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); + q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); + q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL); + exec.unbind(); +} + +size_t +_cl_kernel::mem_local() const { + size_t sz = 0; + + for (auto &arg : args) { + if (dynamic_cast(arg.get())) + sz += arg->storage(); + } + + return sz; +} + +size_t +_cl_kernel::mem_private() const { + return 0; +} + +size_t +_cl_kernel::max_block_size() const { + return SIZE_MAX; +} + +const std::string & +_cl_kernel::name() const { + return __name; +} + +std::vector +_cl_kernel::block_size() const { + return { 0, 0, 0 }; +} + +const clover::module & +_cl_kernel::module(const clover::command_queue &q) const { + return prog.binaries().find(&q.dev)->second; +} + + +_cl_kernel::exec_context::exec_context(clover::kernel &kern) : + kern(kern), q(NULL), mem_local(0), st(NULL) { +} + +_cl_kernel::exec_context::~exec_context() { + if (st) + q->pipe->delete_compute_state(q->pipe, st); +} + +void * +_cl_kernel::exec_context::bind(clover::command_queue *__q) { + std::swap(q, __q); + + for (auto &arg : kern.args) + arg->bind(*this); + + // Create a new compute state if anything changed. + if (!st || q != __q || + cs.req_local_mem != mem_local || + cs.req_input_mem != input.size()) { + if (st) + __q->pipe->delete_compute_state(__q->pipe, st); + + cs.prog = kern.module(*q).sec(module::section::text).data.begin(); + cs.req_local_mem = mem_local; + cs.req_input_mem = input.size(); + st = q->pipe->create_compute_state(q->pipe, &cs); + } + + return st; +} + +void +_cl_kernel::exec_context::unbind() { + for (auto &arg : kern.args) + arg->unbind(*this); + + input.clear(); + samplers.clear(); + sviews.clear(); + resources.clear(); + g_buffers.clear(); + g_handles.clear(); + mem_local = 0; +} + +_cl_kernel::argument::argument(size_t size) : + __size(size), __set(false) { +} + +bool +_cl_kernel::argument::set() const { + return __set; +} + +size_t +_cl_kernel::argument::storage() const { + return 0; +} + +_cl_kernel::scalar_argument::scalar_argument(size_t size) : + argument(size) { +} + +void +_cl_kernel::scalar_argument::set(size_t size, const void *value) { + if (size != __size) + throw error(CL_INVALID_ARG_SIZE); + + v = { (uint8_t *)value, (uint8_t *)value + size }; + __set = true; +} + +void +_cl_kernel::scalar_argument::bind(exec_context &ctx) { + ctx.input.insert(ctx.input.end(), v.begin(), v.end()); +} + +void +_cl_kernel::scalar_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::global_argument::global_argument(size_t size) : + argument(size) { +} + +void +_cl_kernel::global_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::global_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.g_buffers.size(); + + ctx.input.resize(offset + __size); + + ctx.g_buffers.resize(idx + 1); + ctx.g_buffers[idx] = obj->resource(ctx.q).pipe; + + ctx.g_handles.resize(idx + 1); + ctx.g_handles[idx] = offset; +} + +void +_cl_kernel::global_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::local_argument::local_argument() : + argument(sizeof(uint32_t)) { +} + +size_t +_cl_kernel::local_argument::storage() const { + return __storage; +} + +void +_cl_kernel::local_argument::set(size_t size, const void *value) { + if (value) + throw error(CL_INVALID_ARG_VALUE); + + __storage = size; + __set = true; +} + +void +_cl_kernel::local_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t ptr = ctx.mem_local; + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = ptr; + + ctx.mem_local += __storage; +} + +void +_cl_kernel::local_argument::unbind(exec_context &ctx) { +} + +_cl_kernel::constant_argument::constant_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::constant_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::constant_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.resources.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx << 24; + + ctx.resources.resize(idx + 1); + ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false); +} + +void +_cl_kernel::constant_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_surface(*ctx.q, st); +} + +_cl_kernel::image_rd_argument::image_rd_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::image_rd_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::image_rd_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.sviews.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx; + + ctx.sviews.resize(idx + 1); + ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q); +} + +void +_cl_kernel::image_rd_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st); +} + +_cl_kernel::image_wr_argument::image_wr_argument() : + argument(sizeof(uint32_t)) { +} + +void +_cl_kernel::image_wr_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_mem)) + throw error(CL_INVALID_ARG_SIZE); + + obj = dynamic_cast(*(cl_mem *)value); + __set = true; +} + +void +_cl_kernel::image_wr_argument::bind(exec_context &ctx) { + size_t offset = ctx.input.size(); + size_t idx = ctx.resources.size(); + + ctx.input.resize(offset + sizeof(uint32_t)); + *(uint32_t *)&ctx.input[offset] = idx; + + ctx.resources.resize(idx + 1); + ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true); +} + +void +_cl_kernel::image_wr_argument::unbind(exec_context &ctx) { + obj->resource(ctx.q).unbind_surface(*ctx.q, st); +} + +_cl_kernel::sampler_argument::sampler_argument() : + argument(0) { +} + +void +_cl_kernel::sampler_argument::set(size_t size, const void *value) { + if (size != sizeof(cl_sampler)) + throw error(CL_INVALID_ARG_SIZE); + + obj = *(cl_sampler *)value; + __set = true; +} + +void +_cl_kernel::sampler_argument::bind(exec_context &ctx) { + size_t idx = ctx.samplers.size(); + + ctx.samplers.resize(idx + 1); + ctx.samplers[idx] = st = obj->bind(*ctx.q); +} + +void +_cl_kernel::sampler_argument::unbind(exec_context &ctx) { + obj->unbind(*ctx.q, st); +} diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp new file mode 100644 index 00000000000..bc21de8094f --- /dev/null +++ b/src/gallium/state_trackers/clover/core/kernel.hpp @@ -0,0 +1,214 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_KERNEL_HPP__ +#define __CORE_KERNEL_HPP__ + +#include + +#include "core/base.hpp" +#include "core/program.hpp" +#include "core/memory.hpp" +#include "core/sampler.hpp" +#include "pipe/p_state.h" + +namespace clover { + typedef struct _cl_kernel kernel; + class argument; +} + +struct _cl_kernel : public clover::ref_counter { +private: + /// + /// Class containing all the state required to execute a compute + /// kernel. + /// + struct exec_context { + exec_context(clover::kernel &kern); + ~exec_context(); + + void *bind(clover::command_queue *q); + void unbind(); + + clover::kernel &kern; + clover::command_queue *q; + + std::vector input; + std::vector samplers; + std::vector sviews; + std::vector resources; + std::vector g_buffers; + std::vector g_handles; + size_t mem_local; + + private: + void *st; + pipe_compute_state cs; + }; + +public: + class argument { + public: + argument(size_t size); + + /// \a true if the argument has been set. + bool set() const; + + /// Argument size in the input buffer. + size_t size() const; + + /// Storage space required for the referenced object. + virtual size_t storage() const; + + /// Set this argument to some object. + virtual void set(size_t size, const void *value) = 0; + + /// Allocate the necessary resources to bind the specified + /// object to this argument, and update \a ctx accordingly. + virtual void bind(exec_context &ctx) = 0; + + /// Free any resources that were allocated in bind(). + virtual void unbind(exec_context &ctx) = 0; + + protected: + size_t __size; + bool __set; + }; + + _cl_kernel(clover::program &prog, + const std::string &name, + const std::vector &args); + + void launch(clover::command_queue &q, + const std::vector &grid_offset, + const std::vector &grid_size, + const std::vector &block_size); + + size_t mem_local() const; + size_t mem_private() const; + size_t max_block_size() const; + + const std::string &name() const; + std::vector block_size() const; + + clover::program &prog; + std::vector> args; + +private: + const clover::module & + module(const clover::command_queue &q) const; + + class scalar_argument : public argument { + public: + scalar_argument(size_t size); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + std::vector v; + }; + + class global_argument : public argument { + public: + global_argument(size_t size); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::buffer *obj; + }; + + class local_argument : public argument { + public: + local_argument(); + + virtual size_t storage() const; + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + size_t __storage; + }; + + class constant_argument : public argument { + public: + constant_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::buffer *obj; + pipe_surface *st; + }; + + class image_rd_argument : public argument { + public: + image_rd_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::image *obj; + pipe_sampler_view *st; + }; + + class image_wr_argument : public argument { + public: + image_wr_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::image *obj; + pipe_surface *st; + }; + + class sampler_argument : public argument { + public: + sampler_argument(); + + virtual void set(size_t size, const void *value); + virtual void bind(exec_context &ctx); + virtual void unbind(exec_context &ctx); + + private: + clover::sampler *obj; + void *st; + }; + + std::string __name; + exec_context exec; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp new file mode 100644 index 00000000000..1bf12e3c36e --- /dev/null +++ b/src/gallium/state_trackers/clover/core/memory.cpp @@ -0,0 +1,198 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/memory.hpp" +#include "core/resource.hpp" + +using namespace clover; + +_cl_mem::_cl_mem(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + ctx(ctx), __flags(flags), + __size(size), __host_ptr(host_ptr), + __destroy_notify([]{}), + data((char *)host_ptr, (host_ptr ? size : 0)) { +} + +_cl_mem::~_cl_mem() { + __destroy_notify(); +} + +void +_cl_mem::destroy_notify(std::function f) { + __destroy_notify = f; +} + +cl_mem_flags +_cl_mem::flags() const { + return __flags; +} + +size_t +_cl_mem::size() const { + return __size; +} + +void * +_cl_mem::host_ptr() const { + return __host_ptr; +} + +buffer::buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + memory_obj(ctx, flags, size, host_ptr) { +} + +cl_mem_object_type +buffer::type() const { + return CL_MEM_OBJECT_BUFFER; +} + +root_buffer::root_buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr) : + buffer(ctx, flags, size, host_ptr) { +} + +clover::resource & +root_buffer::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = (!resources.empty() ? + new root_resource(q->dev, *this, *resources.begin()->second) : + new root_resource(q->dev, *this, data)); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr(r))); + data.clear(); + } + + return *resources.find(&q->dev)->second; +} + +sub_buffer::sub_buffer(clover::root_buffer &parent, cl_mem_flags flags, + size_t offset, size_t size) : + buffer(parent.ctx, flags, size, + (char *)parent.host_ptr() + offset), + parent(parent), __offset(offset) { +} + +clover::resource & +sub_buffer::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = new sub_resource(parent.resource(q), { offset() }); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr(r))); + } + + return *resources.find(&q->dev)->second; +} + +size_t +sub_buffer::offset() const { + return __offset; +} + +image::image(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, size_t size, + void *host_ptr) : + memory_obj(ctx, flags, size, host_ptr), + __format(*format), __width(width), __height(height), __depth(depth), + __row_pitch(row_pitch), __slice_pitch(slice_pitch) { +} + +clover::resource & +image::resource(cl_command_queue q) { + // Create a new resource if there's none for this device yet. + if (!resources.count(&q->dev)) { + auto r = (!resources.empty() ? + new root_resource(q->dev, *this, *resources.begin()->second) : + new root_resource(q->dev, *this, data)); + + resources.insert(std::make_pair(&q->dev, + std::unique_ptr(r))); + data.clear(); + } + + return *resources.find(&q->dev)->second; +} + +cl_image_format +image::format() const { + return __format; +} + +size_t +image::width() const { + return __width; +} + +size_t +image::height() const { + return __height; +} + +size_t +image::depth() const { + return __depth; +} + +size_t +image::row_pitch() const { + return __row_pitch; +} + +size_t +image::slice_pitch() const { + return __slice_pitch; +} + +image2d::image2d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, size_t width, + size_t height, size_t row_pitch, + void *host_ptr) : + image(ctx, flags, format, width, height, 0, + row_pitch, 0, height * row_pitch, host_ptr) { +} + +cl_mem_object_type +image2d::type() const { + return CL_MEM_OBJECT_IMAGE2D; +} + +image3d::image3d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr) : + image(ctx, flags, format, width, height, depth, + row_pitch, slice_pitch, depth * slice_pitch, + host_ptr) { +} + +cl_mem_object_type +image3d::type() const { + return CL_MEM_OBJECT_IMAGE3D; +} diff --git a/src/gallium/state_trackers/clover/core/memory.hpp b/src/gallium/state_trackers/clover/core/memory.hpp new file mode 100644 index 00000000000..96f70e931bc --- /dev/null +++ b/src/gallium/state_trackers/clover/core/memory.hpp @@ -0,0 +1,157 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_MEMORY_HPP__ +#define __CORE_MEMORY_HPP__ + +#include +#include +#include + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_mem memory_obj; + + class resource; + class sub_resource; +} + +struct _cl_mem : public clover::ref_counter { +protected: + _cl_mem(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + _cl_mem(const _cl_mem &obj) = delete; + +public: + virtual ~_cl_mem(); + + virtual cl_mem_object_type type() const = 0; + virtual clover::resource &resource(cl_command_queue q) = 0; + + void destroy_notify(std::function f); + cl_mem_flags flags() const; + size_t size() const; + void *host_ptr() const; + + clover::context &ctx; + +private: + cl_mem_flags __flags; + size_t __size; + void *__host_ptr; + std::function __destroy_notify; + +protected: + std::string data; +}; + +namespace clover { + struct buffer : public memory_obj { + protected: + buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + + public: + virtual cl_mem_object_type type() const; + }; + + struct root_buffer : public buffer { + public: + root_buffer(clover::context &ctx, cl_mem_flags flags, + size_t size, void *host_ptr); + + virtual clover::resource &resource(cl_command_queue q); + + private: + std::map> resources; + }; + + struct sub_buffer : public buffer { + public: + sub_buffer(clover::root_buffer &parent, cl_mem_flags flags, + size_t offset, size_t size); + + virtual clover::resource &resource(cl_command_queue q); + size_t offset() const; + + clover::root_buffer &parent; + + private: + size_t __offset; + std::map> resources; + }; + + struct image : public memory_obj { + protected: + image(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, size_t size, + void *host_ptr); + + public: + virtual clover::resource &resource(cl_command_queue q); + cl_image_format format() const; + size_t width() const; + size_t height() const; + size_t depth() const; + size_t row_pitch() const; + size_t slice_pitch() const; + + private: + cl_image_format __format; + size_t __width; + size_t __height; + size_t __depth; + size_t __row_pitch; + size_t __slice_pitch; + std::map> resources; + }; + + struct image2d : public image { + public: + image2d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, size_t width, + size_t height, size_t row_pitch, + void *host_ptr); + + virtual cl_mem_object_type type() const; + }; + + struct image3d : public image { + public: + image3d(clover::context &ctx, cl_mem_flags flags, + const cl_image_format *format, + size_t width, size_t height, size_t depth, + size_t row_pitch, size_t slice_pitch, + void *host_ptr); + + virtual cl_mem_object_type type() const; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp new file mode 100644 index 00000000000..1865771443b --- /dev/null +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -0,0 +1,172 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include +#include + +#include "core/module.hpp" + +using namespace clover; + +namespace { + template + struct __serializer; + + /// Serialize the specified object. + template + void + __proc(compat::ostream &os, const T &x) { + __serializer::proc(os, x); + } + + /// Deserialize the specified object. + template + void + __proc(compat::istream &is, T &x) { + __serializer::proc(is, x); + } + + template + T + __proc(compat::istream &is) { + T x; + __serializer::proc(is, x); + return x; + } + + /// (De)serialize a scalar value. + template + struct __serializer::value>::type> { + static void + proc(compat::ostream &os, const T &x) { + os.write(reinterpret_cast(&x), sizeof(x)); + } + + static void + proc(compat::istream &is, T &x) { + is.read(reinterpret_cast(&x), sizeof(x)); + } + }; + + /// (De)serialize a vector. + template + struct __serializer> { + static void + proc(compat::ostream &os, const compat::vector &v) { + __proc(os, v.size()); + + for (size_t i = 0; i < v.size(); i++) + __proc(os, v[i]); + } + + static void + proc(compat::istream &is, compat::vector &v) { + v.reserve(__proc(is)); + + for (size_t i = 0; i < v.size(); i++) + new(&v[i]) T(__proc(is)); + } + }; + + /// (De)serialize a module::section. + template<> + struct __serializer { + template + static void + proc(S &s, QT &x) { + __proc(s, x.type); + __proc(s, x.size); + __proc(s, x.data); + } + }; + + /// (De)serialize a module::argument. + template<> + struct __serializer { + template + static void + proc(S &s, QT &x) { + __proc(s, x.type); + __proc(s, x.size); + } + }; + + /// (De)serialize a module::symbol. + template<> + struct __serializer { + template + static void + proc(S &s, QT &x) { + __proc(s, x.section); + __proc(s, x.offset); + __proc(s, x.args); + } + }; + + /// (De)serialize a module. + template<> + struct __serializer { + template + static void + proc(S &s, QT &x) { + __proc(s, x.syms); + __proc(s, x.secs); + } + }; +}; + +namespace clover { + void + module::serialize(compat::ostream &os) const { + __proc(os, *this); + } + + module + module::deserialize(compat::istream &is) { + return __proc(is); + } + + const module::symbol & + module::sym(compat::string name) const { + auto it = std::find_if(syms.begin(), syms.end(), [&](const symbol &x) { + return compat::string(x.name) == name; + }); + + if (it == syms.end()) + throw noent_error(); + + return *it; + } + + const module::section & + module::sec(typename section::type type) const { + auto it = std::find_if(secs.begin(), secs.end(), [&](const section &x) { + return x.type == type; + }); + + if (it == secs.end()) + throw noent_error(); + + return *it; + } +} diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp new file mode 100644 index 00000000000..bc4b203af8e --- /dev/null +++ b/src/gallium/state_trackers/clover/core/module.hpp @@ -0,0 +1,93 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_MODULE_HPP__ +#define __CORE_MODULE_HPP__ + +#include "core/compat.hpp" + +namespace clover { + struct module { + class noent_error { + public: + virtual ~noent_error() {} + }; + + typedef uint32_t resource_id; + typedef uint32_t size_t; + + struct section { + enum type { + text, + data_constant, + data_global, + data_local, + data_private + }; + + resource_id id; + type type; + size_t size; + clover::compat::vector data; + }; + + struct argument { + enum type { + scalar, + constant, + global, + local, + image2d_rd, + image2d_wr, + image3d_rd, + image3d_wr, + sampler + }; + + type type; + size_t size; + }; + + struct symbol { + clover::compat::vector name; + resource_id section; + size_t offset; + clover::compat::vector args; + }; + + void serialize(compat::ostream &os) const; + static module deserialize(compat::istream &is); + + /// Look up a symbol by name. Throws module::noent_error if not + /// found. + const symbol &sym(compat::string name) const; + + /// Look up a section by type. Throws module::noent_error if not + /// found. + const section &sec(typename section::type type) const; + + clover::compat::vector syms; + clover::compat::vector
secs; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp new file mode 100644 index 00000000000..5ac9f93480e --- /dev/null +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -0,0 +1,85 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/program.hpp" +#include "core/compiler.hpp" + +using namespace clover; + +_cl_program::_cl_program(clover::context &ctx, + const std::string &source) : + ctx(ctx), __source(source) { +} + +_cl_program::_cl_program(clover::context &ctx, + const std::vector &devs, + const std::vector &binaries) : + ctx(ctx) { + for_each([&](clover::device *dev, const clover::module &bin) { + __binaries.insert({ dev, bin }); + }, + devs.begin(), devs.end(), binaries.begin()); +} + +void +_cl_program::build(const std::vector &devs) { + __binaries.clear(); + __logs.clear(); + + for (auto dev : devs) { + try { + auto module = (dev->ir_target() == "tgsi" ? + compile_program_tgsi(__source, dev->ir_target()) : + compile_program_llvm(__source, dev->ir_target())); + __binaries.insert({ dev, module }); + + } catch (build_error &e) { + __logs.insert({ dev, e.what() }); + throw error(CL_BUILD_PROGRAM_FAILURE); + } + } +} + +const std::string & +_cl_program::source() const { + return __source; +} + +const std::map & +_cl_program::binaries() const { + return __binaries; +} + +cl_build_status +_cl_program::build_status(clover::device *dev) const { + return __binaries.count(dev) ? CL_BUILD_SUCCESS : CL_BUILD_NONE; +} + +std::string +_cl_program::build_opts(clover::device *dev) const { + return {}; +} + +std::string +_cl_program::build_log(clover::device *dev) const { + return __logs.count(dev) ? __logs.find(dev)->second : ""; +} diff --git a/src/gallium/state_trackers/clover/core/program.hpp b/src/gallium/state_trackers/clover/core/program.hpp new file mode 100644 index 00000000000..f3858f6ce98 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/program.hpp @@ -0,0 +1,61 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_PROGRAM_HPP__ +#define __CORE_PROGRAM_HPP__ + +#include + +#include "core/base.hpp" +#include "core/context.hpp" +#include "core/module.hpp" + +namespace clover { + typedef struct _cl_program program; +} + +struct _cl_program : public clover::ref_counter { +public: + _cl_program(clover::context &ctx, + const std::string &source); + _cl_program(clover::context &ctx, + const std::vector &devs, + const std::vector &binaries); + + void build(const std::vector &devs); + + const std::string &source() const; + const std::map &binaries() const; + + cl_build_status build_status(clover::device *dev) const; + std::string build_opts(clover::device *dev) const; + std::string build_log(clover::device *dev) const; + + clover::context &ctx; + +private: + std::map __binaries; + std::map __logs; + std::string __source; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp new file mode 100644 index 00000000000..7e476c715e0 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/queue.cpp @@ -0,0 +1,69 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +#include "core/queue.hpp" +#include "core/event.hpp" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +using namespace clover; + +_cl_command_queue::_cl_command_queue(context &ctx, device &dev, + cl_command_queue_properties props) : + ctx(ctx), dev(dev), __props(props) { + pipe = dev.pipe->context_create(dev.pipe, NULL); + if (!pipe) + throw error(CL_INVALID_DEVICE); +} + +_cl_command_queue::~_cl_command_queue() { + pipe->destroy(pipe); +} + +void +_cl_command_queue::flush() { + pipe_screen *screen = dev.pipe; + pipe_fence_handle *fence = NULL; + + if (!queued_events.empty()) { + // Find out which events have already been signalled. + auto first = queued_events.begin(); + auto last = std::find_if(queued_events.begin(), queued_events.end(), + [](event_ptr &ev) { return !ev->signalled(); }); + + // Flush and fence them. + pipe->flush(pipe, &fence); + std::for_each(first, last, [&](event_ptr &ev) { ev->fence(fence); }); + screen->fence_reference(screen, &fence, NULL); + queued_events.erase(first, last); + } +} + +void +_cl_command_queue::sequence(clover::hard_event *ev) { + if (!queued_events.empty()) + queued_events.back()->chain(ev); + + queued_events.push_back(ev); +} diff --git a/src/gallium/state_trackers/clover/core/queue.hpp b/src/gallium/state_trackers/clover/core/queue.hpp new file mode 100644 index 00000000000..54c949b203f --- /dev/null +++ b/src/gallium/state_trackers/clover/core/queue.hpp @@ -0,0 +1,71 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_QUEUE_HPP__ +#define __CORE_QUEUE_HPP__ + +#include "core/base.hpp" +#include "core/context.hpp" +#include "pipe/p_context.h" + +namespace clover { + typedef struct _cl_command_queue command_queue; + class resource; + class mapping; + class hard_event; +} + +struct _cl_command_queue : public clover::ref_counter { +public: + _cl_command_queue(clover::context &ctx, clover::device &dev, + cl_command_queue_properties props); + _cl_command_queue(const _cl_command_queue &q) = delete; + ~_cl_command_queue(); + + void flush(); + + cl_command_queue_properties props() const { + return __props; + } + + clover::context &ctx; + clover::device &dev; + + friend class clover::resource; + friend class clover::mapping; + friend class clover::hard_event; + friend struct _cl_sampler; + friend struct _cl_kernel; + +private: + /// Serialize a hardware event with respect to the previous ones, + /// and push it to the pending list. + void sequence(clover::hard_event *ev); + + cl_command_queue_properties __props; + pipe_context *pipe; + + typedef clover::ref_ptr event_ptr; + std::vector queued_events; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp new file mode 100644 index 00000000000..1d241e595aa --- /dev/null +++ b/src/gallium/state_trackers/clover/core/resource.cpp @@ -0,0 +1,192 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/resource.hpp" +#include "pipe/p_screen.h" +#include "util/u_sampler.h" + +using namespace clover; + +namespace { + class box { + public: + box(const resource::point &origin, const resource::point &size) : + pipe({ (unsigned)origin[0], (unsigned)origin[1], + (unsigned)origin[2], (unsigned)size[0], + (unsigned)size[1], (unsigned)size[2] }) { + } + + operator const pipe_box *() { + return &pipe; + } + + protected: + pipe_box pipe; + }; +} + +resource::resource(clover::device &dev, clover::memory_obj &obj) : + dev(dev), obj(obj), pipe(NULL), offset{0} { +} + +resource::~resource() { +} + +void +resource::copy(command_queue &q, const point &origin, const point ®ion, + resource &src_res, const point &src_origin) { + point p = offset + origin; + + q.pipe->resource_copy_region(q.pipe, pipe, 0, p[0], p[1], p[2], + src_res.pipe, 0, + box(src_res.offset + src_origin, region)); +} + +void * +resource::add_map(command_queue &q, cl_map_flags flags, bool blocking, + const point &origin, const point ®ion) { + maps.emplace_back(q, *this, flags, blocking, origin, region); + return maps.back(); +} + +void +resource::del_map(void *p) { + auto it = std::find(maps.begin(), maps.end(), p); + if (it != maps.end()) + maps.erase(it); +} + +unsigned +resource::map_count() const { + return maps.size(); +} + +pipe_sampler_view * +resource::bind_sampler_view(clover::command_queue &q) { + pipe_sampler_view info; + + u_sampler_view_default_template(&info, pipe, pipe->format); + return q.pipe->create_sampler_view(q.pipe, pipe, &info); +} + +void +resource::unbind_sampler_view(clover::command_queue &q, + pipe_sampler_view *st) { + q.pipe->sampler_view_destroy(q.pipe, st); +} + +pipe_surface * +resource::bind_surface(clover::command_queue &q, bool rw) { + pipe_surface info {}; + + info.format = pipe->format; + info.usage = pipe->bind; + info.writable = rw; + + if (pipe->target == PIPE_BUFFER) + info.u.buf.last_element = pipe->width0 - 1; + + return q.pipe->create_surface(q.pipe, pipe, &info); +} + +void +resource::unbind_surface(clover::command_queue &q, pipe_surface *st) { + q.pipe->surface_destroy(q.pipe, st); +} + +root_resource::root_resource(clover::device &dev, clover::memory_obj &obj, + std::string data) : + resource(dev, obj) { + pipe_resource info {}; + + if (image *img = dynamic_cast(&obj)) { + info.format = translate_format(img->format()); + info.width0 = img->width(); + info.height0 = img->height(); + info.depth0 = img->depth(); + } else { + info.width0 = obj.size(); + } + + info.target = translate_target(obj.type()); + info.bind = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL | + PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE); + + pipe = dev.pipe->resource_create(dev.pipe, &info); + if (!pipe) + throw error(CL_OUT_OF_RESOURCES); + + assert(data.empty()); // XXX -- initialize it with the supplied data +} + +root_resource::root_resource(clover::device &dev, clover::memory_obj &obj, + clover::root_resource &r) : + resource(dev, obj) { + assert(0); // XXX -- resource shared among dev and r.dev +} + +root_resource::~root_resource() { + dev.pipe->resource_destroy(dev.pipe, pipe); +} + +sub_resource::sub_resource(clover::resource &r, point offset) : + resource(r.dev, r.obj) { + pipe = r.pipe; + offset = r.offset + offset; +} + +mapping::mapping(command_queue &q, resource &r, + cl_map_flags flags, bool blocking, + const resource::point &origin, + const resource::point ®ion) : + pctx(q.pipe) { + unsigned usage = ((flags & CL_MAP_WRITE ? PIPE_TRANSFER_WRITE : 0 ) | + (flags & CL_MAP_READ ? PIPE_TRANSFER_READ : 0 ) | + (blocking ? PIPE_TRANSFER_UNSYNCHRONIZED : 0)); + + pxfer = pctx->get_transfer(pctx, r.pipe, 0, usage, + box(origin + r.offset, region)); + if (!pxfer) + throw error(CL_OUT_OF_RESOURCES); + + p = pctx->transfer_map(pctx, pxfer); + if (!p) { + pctx->transfer_destroy(pctx, pxfer); + throw error(CL_OUT_OF_RESOURCES); + } +} + +mapping::mapping(mapping &&m) : + pctx(m.pctx), pxfer(m.pxfer), p(m.p) { + m.p = NULL; + m.pxfer = NULL; +} + +mapping::~mapping() { + if (pxfer) { + pctx->transfer_unmap(pctx, pxfer); + pctx->transfer_destroy(pctx, pxfer); + } +} diff --git a/src/gallium/state_trackers/clover/core/resource.hpp b/src/gallium/state_trackers/clover/core/resource.hpp new file mode 100644 index 00000000000..d4992972903 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/resource.hpp @@ -0,0 +1,129 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_RESOURCE_HPP__ +#define __CORE_RESOURCE_HPP__ + +#include + +#include "core/base.hpp" +#include "core/memory.hpp" +#include "core/geometry.hpp" +#include "pipe/p_state.h" + +namespace clover { + class mapping; + + /// + /// Class that represents a device-specific instance of some memory + /// object. + /// + class resource { + public: + typedef clover::point point; + + resource(const resource &r) = delete; + virtual ~resource(); + + void copy(command_queue &q, const point &origin, const point ®ion, + resource &src_resource, const point &src_origin); + + void *add_map(command_queue &q, cl_map_flags flags, bool blocking, + const point &origin, const point ®ion); + void del_map(void *p); + unsigned map_count() const; + + clover::device &dev; + clover::memory_obj &obj; + + friend class sub_resource; + friend class mapping; + friend struct ::_cl_kernel; + + protected: + resource(clover::device &dev, clover::memory_obj &obj); + + pipe_sampler_view *bind_sampler_view(clover::command_queue &q); + void unbind_sampler_view(clover::command_queue &q, + pipe_sampler_view *st); + + pipe_surface *bind_surface(clover::command_queue &q, bool rw); + void unbind_surface(clover::command_queue &q, pipe_surface *st); + + pipe_resource *pipe; + point offset; + + private: + std::list maps; + }; + + /// + /// Resource associated with its own top-level data storage + /// allocated in some device. + /// + class root_resource : public resource { + public: + root_resource(clover::device &dev, clover::memory_obj &obj, + std::string data); + root_resource(clover::device &dev, clover::memory_obj &obj, + root_resource &r); + virtual ~root_resource(); + }; + + /// + /// Resource that reuses a portion of some other resource as data + /// storage. + /// + class sub_resource : public resource { + public: + sub_resource(clover::resource &r, point offset); + }; + + /// + /// Class that represents a mapping of some resource into the CPU + /// memory space. + /// + class mapping { + public: + mapping(command_queue &q, resource &r, cl_map_flags flags, + bool blocking, const resource::point &origin, + const resource::point ®ion); + mapping(const mapping &m) = delete; + mapping(mapping &&m); + ~mapping(); + + operator void *() { + return p; + } + + operator char *() { + return (char *)p; + } + + private: + pipe_context *pctx; + pipe_transfer *pxfer; + void *p; + }; +} + +#endif diff --git a/src/gallium/state_trackers/clover/core/sampler.cpp b/src/gallium/state_trackers/clover/core/sampler.cpp new file mode 100644 index 00000000000..6d683f2b41a --- /dev/null +++ b/src/gallium/state_trackers/clover/core/sampler.cpp @@ -0,0 +1,73 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/sampler.hpp" +#include "pipe/p_state.h" + +using namespace clover; + +_cl_sampler::_cl_sampler(clover::context &ctx, bool norm_mode, + cl_addressing_mode addr_mode, + cl_filter_mode filter_mode) : + ctx(ctx), __norm_mode(norm_mode), + __addr_mode(addr_mode), __filter_mode(filter_mode) { +} + +bool +_cl_sampler::norm_mode() { + return __norm_mode; +} + +cl_addressing_mode +_cl_sampler::addr_mode() { + return __addr_mode; +} + +cl_filter_mode +_cl_sampler::filter_mode() { + return __filter_mode; +} + +void * +_cl_sampler::bind(clover::command_queue &q) { + struct pipe_sampler_state info {}; + + info.normalized_coords = norm_mode(); + + info.wrap_s = info.wrap_t = info.wrap_r = + (addr_mode() == CL_ADDRESS_CLAMP_TO_EDGE ? PIPE_TEX_WRAP_CLAMP_TO_EDGE : + addr_mode() == CL_ADDRESS_CLAMP ? PIPE_TEX_WRAP_CLAMP_TO_BORDER : + addr_mode() == CL_ADDRESS_REPEAT ? PIPE_TEX_WRAP_REPEAT : + addr_mode() == CL_ADDRESS_MIRRORED_REPEAT ? PIPE_TEX_WRAP_MIRROR_REPEAT : + PIPE_TEX_WRAP_CLAMP_TO_EDGE); + + info.min_img_filter = info.mag_img_filter = + (filter_mode() == CL_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR : + PIPE_TEX_FILTER_NEAREST); + + return q.pipe->create_sampler_state(q.pipe, &info); +} + +void +_cl_sampler::unbind(clover::command_queue &q, void *st) { + q.pipe->delete_sampler_state(q.pipe, st); +} diff --git a/src/gallium/state_trackers/clover/core/sampler.hpp b/src/gallium/state_trackers/clover/core/sampler.hpp new file mode 100644 index 00000000000..5bb5bccb1a1 --- /dev/null +++ b/src/gallium/state_trackers/clover/core/sampler.hpp @@ -0,0 +1,55 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef __CORE_SAMPLER_HPP__ +#define __CORE_SAMPLER_HPP__ + +#include "core/base.hpp" +#include "core/queue.hpp" + +namespace clover { + typedef struct _cl_sampler sampler; +} + +struct _cl_sampler : public clover::ref_counter { +public: + _cl_sampler(clover::context &ctx, bool norm_mode, + cl_addressing_mode addr_mode, cl_filter_mode filter_mode); + + bool norm_mode(); + cl_addressing_mode addr_mode(); + cl_filter_mode filter_mode(); + + clover::context &ctx; + + friend class _cl_kernel; + +private: + void *bind(clover::command_queue &q); + void unbind(clover::command_queue &q, void *st); + + bool __norm_mode; + cl_addressing_mode __addr_mode; + cl_filter_mode __filter_mode; +}; + +#endif diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp new file mode 100644 index 00000000000..89e21bf9289 --- /dev/null +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -0,0 +1,94 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "core/compiler.hpp" + +#if 0 +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#endif + +using namespace clover; + +#if 0 +namespace { + void + build_binary(const std::string &source, const std::string &target, + const std::string &name) { + clang::CompilerInstance c; + clang::EmitObjAction act(&llvm::getGlobalContext()); + std::string log; + llvm::raw_string_ostream s_log(log); + + LLVMInitializeTGSITarget(); + LLVMInitializeTGSITargetInfo(); + LLVMInitializeTGSITargetMC(); + LLVMInitializeTGSIAsmPrinter(); + + c.getFrontendOpts().Inputs.push_back( + std::make_pair(clang::IK_OpenCL, name)); + c.getHeaderSearchOpts().UseBuiltinIncludes = false; + c.getHeaderSearchOpts().UseStandardIncludes = false; + c.getLangOpts().NoBuiltin = true; + c.getTargetOpts().Triple = target; + c.getInvocation().setLangDefaults(clang::IK_OpenCL); + c.createDiagnostics(0, NULL, new clang::TextDiagnosticPrinter( + s_log, c.getDiagnosticOpts())); + + c.getPreprocessorOpts().addRemappedFile( + name, llvm::MemoryBuffer::getMemBuffer(source)); + + if (!c.ExecuteAction(act)) + throw build_error(log); + } + + module + load_binary(const char *name) { + std::ifstream fs((name)); + std::vector str((std::istreambuf_iterator(fs)), + (std::istreambuf_iterator())); + compat::istream cs(str); + return module::deserialize(cs); + } +} +#endif + +module +clover::compile_program_llvm(const compat::string &source, + const compat::string &target) { +#if 0 + build_binary(source, target, "cl_input"); + module m = load_binary("cl_input.o"); + std::remove("cl_input.o"); + return m; +#endif + return module(); +} diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp new file mode 100644 index 00000000000..eb27db1aa76 --- /dev/null +++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp @@ -0,0 +1,100 @@ +// +// Copyright 2012 Francisco Jerez +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +#include "core/compiler.hpp" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" +#include "util/u_memory.h" + +using namespace clover; + +namespace { + void + read_header(const std::string &header, module &m) { + std::istringstream ls(header); + std::string line; + + while (getline(ls, line)) { + std::istringstream ts(line); + std::string name, tok; + module::size_t offset; + compat::vector args; + + if (!(ts >> name)) + continue; + + if (!(ts >> offset)) + throw build_error("invalid kernel start address"); + + while (ts >> tok) { + if (tok == "scalar") + args.push_back({ module::argument::scalar, 4 }); + else if (tok == "global") + args.push_back({ module::argument::global, 4 }); + else if (tok == "local") + args.push_back({ module::argument::local, 4 }); + else if (tok == "constant") + args.push_back({ module::argument::constant, 4 }); + else if (tok == "image2d_rd") + args.push_back({ module::argument::image2d_rd, 4 }); + else if (tok == "image2d_wr") + args.push_back({ module::argument::image2d_wr, 4 }); + else if (tok == "image3d_rd") + args.push_back({ module::argument::image3d_rd, 4 }); + else if (tok == "image3d_wr") + args.push_back({ module::argument::image3d_wr, 4 }); + else if (tok == "sampler") + args.push_back({ module::argument::sampler, 0 }); + else + throw build_error("invalid kernel argument"); + } + + m.syms.push_back({ name, 0, offset, args }); + } + } + + void + read_body(const char *source, module &m) { + tgsi_token prog[1024]; + + if (!tgsi_text_translate(source, prog, Elements(prog))) + throw build_error("translate failed"); + + unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token); + m.secs.push_back({ 0, module::section::text, sz, { (char *)prog, sz } }); + } +} + +module +clover::compile_program_tgsi(const compat::string &source, + const compat::string &target) { + const char *body = source.find("COMP\n"); + module m; + + read_header({ source.begin(), body }, m); + read_body(body, m); + + return m; +} diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am new file mode 100644 index 00000000000..0d233c11b8d --- /dev/null +++ b/src/gallium/targets/opencl/Makefile.am @@ -0,0 +1,36 @@ +AUTOMAKE_OPTIONS = subdir-objects + +lib_LTLIBRARIES = libOpenCL.la + +libOpenCL_la_LDFLAGS = \ + -version-number 1:0 + +libOpenCL_la_LIBADD = \ + $(top_builddir)/src/gallium/state_trackers/clover/libclover.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.a \ + $(GALLIUM_PIPE_LOADER_LIBS) $(LIBUDEV_LIBS) \ + -ldl + +libOpenCL_la_SOURCES = + +# Force usage of a C++ linker +nodist_EXTRA_libOpenCL_la_SOURCES = dummy.cpp + +PIPE_SRC_DIR = $(top_srcdir)/src/gallium/targets/pipe-loader + +# Provide compatibility with scripts for the old Mesa build system for +# a while by putting a link to the driver into /lib of the build tree. +all-local: libOpenCL.la + @$(MAKE) -C $(PIPE_SRC_DIR) + $(MKDIR_P) $(top_builddir)/$(LIB_DIR) + ln -f .libs/libOpenCL.so* $(top_builddir)/$(LIB_DIR)/ + +install-exec-local: + @$(MAKE) -C $(PIPE_SRC_DIR) PIPE_INSTALL_DIR=$(OPENCL_LIB_INSTALL_DIR) install + +clean-local: + @$(MAKE) -C $(PIPE_SRC_DIR) clean + +# FIXME: Remove when the rest of Gallium is converted to automake. +TOP=$(top_builddir) +default: all