``PIPE_TRANSFER_FLUSH_EXPLICIT``
Written ranges will be notified later with :ref:`transfer_flush_region`.
Cannot be used with ``PIPE_TRANSFER_READ``.
+
+
+Compute kernel execution
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A compute program can be defined, bound or destroyed using
+``create_compute_state``, ``bind_compute_state`` or
+``destroy_compute_state`` respectively.
+
+Any of the subroutines contained within the compute program can be
+executed on the device using the ``launch_grid`` method. This method
+will execute as many instances of the program as elements in the
+specified N-dimensional grid, hopefully in parallel.
+
+The compute program has access to four special resources:
+
+* ``GLOBAL`` represents a memory space shared among all the threads
+ running on the device. An arbitrary buffer created with the
+ ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the
+ ``set_global_binding`` method.
+
+* ``LOCAL`` represents a memory space shared among all the threads
+ running in the same working group. The initial contents of this
+ resource are undefined.
+
+* ``PRIVATE`` represents a memory space local to a single thread.
+ The initial contents of this resource are undefined.
+
+* ``INPUT`` represents a read-only memory space that can be
+ initialized at ``launch_grid`` time.
+
+These resources use a byte-based addressing scheme, and they can be
+accessed from the compute program by means of the LOAD/STORE TGSI
+opcodes.
+
+In addition, normal texture sampling is allowed from the compute
+program: ``bind_compute_sampler_states`` may be used to set up texture
+samplers for the compute stage and ``set_compute_sampler_views`` may
+be used to bind a number of sampler views to it.
* ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes
a hw limitation. If true, pipe_vertex_element::src_offset must always be
aligned to 4. If false, there are no restrictions on src_offset.
-
+* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the
+ compute entry points defined in pipe_context and pipe_screen.
.. _pipe_capf:
samplers.
+.. _pipe_compute_cap:
+
+PIPE_COMPUTE_CAP_*
+^^^^^^^^^^^^^^^^^^
+
+Compute-specific capabilities. They can be queried using
+pipe_screen::get_compute_param.
+
+* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
+ for grid and block coordinates. Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
+ units. Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
+ units. Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
+ resource. Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
+ resource. Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
+ resource. Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
+ resource. Value type: ``uint64_t``.
+
.. _pipe_bind:
PIPE_BIND_*
* ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer.
* ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another
process.
+* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
+ address space of a compute program.
.. _pipe_usage:
struct pipe_video_buffer;
struct pipe_video_decoder;
struct pipe_viewport_state;
+struct pipe_compute_state;
union pipe_color_union;
union pipe_query_result;
void (*bind_geometry_sampler_states)(struct pipe_context *,
unsigned num_samplers,
void **samplers);
+ void (*bind_compute_sampler_states)(struct pipe_context *,
+ unsigned start_slot,
+ unsigned num_samplers,
+ void **samplers);
void (*delete_sampler_state)(struct pipe_context *, void *);
void * (*create_rasterizer_state)(struct pipe_context *,
unsigned num_views,
struct pipe_sampler_view **);
+ void (*set_compute_sampler_views)(struct pipe_context *,
+ unsigned start_slot, unsigned num_views,
+ struct pipe_sampler_view **);
+
void (*set_vertex_buffers)( struct pipe_context *,
unsigned num_buffers,
const struct pipe_vertex_buffer * );
*/
struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context,
const struct pipe_video_buffer *templat );
+
+ /**
+ * Compute kernel execution
+ */
+ /*@{*/
+ /**
+ * Define the compute program and parameters to be used by
+ * pipe_context::launch_grid.
+ */
+ void *(*create_compute_state)(struct pipe_context *context,
+ const struct pipe_compute_state *);
+ void (*bind_compute_state)(struct pipe_context *, void *);
+ void (*delete_compute_state)(struct pipe_context *, void *);
+
+ /**
+ * Bind an array of buffers to be mapped into the address space of
+ * the GLOBAL resource. Any buffers that were previously bound
+ * between [first, first + count - 1] are unbound after this call.
+ *
+ * \param first first buffer to map.
+ * \param count number of consecutive buffers to map.
+ * \param resources array of pointers to the buffers to map, it
+ * should contain at least \a count elements
+ * unless it's NULL, in which case no new
+ * resources will be bound.
+ * \param handles array of pointers to the memory locations that
+ * will be filled with the respective base
+ * addresses each buffer will be mapped to. It
+ * should contain at least \a count elements,
+ * unless \a resources is NULL in which case \a
+ * handles should be NULL as well.
+ *
+ * Note that the driver isn't required to make any guarantees about
+ * the contents of the \a handles array being valid anytime except
+ * during the subsequent calls to pipe_context::launch_grid. This
+ * means that the only sensible location handles[i] may point to is
+ * somewhere within the INPUT buffer itself. This is so to
+ * accommodate implementations that lack virtual memory but
+ * nevertheless migrate buffers on the fly, leading to resource
+ * base addresses that change on each kernel invocation or are
+ * unknown to the pipe driver.
+ */
+ void (*set_global_binding)(struct pipe_context *context,
+ unsigned first, unsigned count,
+ struct pipe_resource **resources,
+ uint32_t **handles);
+
+ /**
+ * Launch the compute kernel starting from instruction \a pc of the
+ * currently bound compute program.
+ *
+ * \a grid_layout and \a block_layout are arrays of size \a
+ * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
+ * grid (in block units) and working block (in thread units) to be
+ * used, respectively.
+ *
+ * \a input will be used to initialize the INPUT resource, and it
+ * should point to a buffer of at least
+ * pipe_compute_state::req_input_mem bytes.
+ */
+ void (*launch_grid)(struct pipe_context *context,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t pc, const void *input);
+ /*@}*/
};
#define PIPE_BIND_STREAM_OUTPUT (1 << 11) /* set_stream_output_buffers */
#define PIPE_BIND_CURSOR (1 << 16) /* mouse cursor */
#define PIPE_BIND_CUSTOM (1 << 17) /* state-tracker/winsys usages */
+#define PIPE_BIND_GLOBAL (1 << 18) /* set_global_binding */
/* The first two flags above were previously part of the amorphous
* TEXTURE_USAGE, most of which are now descriptions of the ways a
#define PIPE_SHADER_VERTEX 0
#define PIPE_SHADER_FRAGMENT 1
#define PIPE_SHADER_GEOMETRY 2
-#define PIPE_SHADER_TYPES 3
+#define PIPE_SHADER_COMPUTE 3
+#define PIPE_SHADER_TYPES 4
/**
PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65,
PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66,
PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67,
+ PIPE_CAP_COMPUTE = 68
};
/**
PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18
};
+/**
+ * Compute-specific implementation capability. They can be queried
+ * using pipe_screen::get_compute_param.
+ */
+enum pipe_compute_cap
+{
+ PIPE_COMPUTE_CAP_GRID_DIMENSION,
+ PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+ PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+ PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE,
+ PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+ PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE,
+ PIPE_COMPUTE_CAP_MAX_INPUT_SIZE
+};
/**
* Composite query types
enum pipe_video_profile profile,
enum pipe_video_cap param );
+ /**
+ * Query a compute-specific capability/parameter/limit.
+ * \param param one of PIPE_COMPUTE_CAP_x
+ * \param ret pointer to a preallocated buffer that will be
+ * initialized to the parameter value, or NULL.
+ * \return size in bytes of the parameter value that would be
+ * returned.
+ */
+ int (*get_compute_param)(struct pipe_screen *,
+ enum pipe_compute_cap param,
+ void *ret);
+
struct pipe_context * (*context_create)( struct pipe_screen *,
void *priv );
unsigned ReturnTypeW : 6; /**< one of enum pipe_type */
};
+/*
+ * Special resources that don't need to be declared. They map to the
+ * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces.
+ */
+#define TGSI_RESOURCE_GLOBAL 0x7fff
+#define TGSI_RESOURCE_LOCAL 0x7ffe
+#define TGSI_RESOURCE_PRIVATE 0x7ffd
+#define TGSI_RESOURCE_INPUT 0x7ffc
+
#define TGSI_IMM_FLOAT32 0
#define TGSI_IMM_UINT32 1
#define TGSI_IMM_INT32 2
unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
};
+struct pipe_compute_state
+{
+ const struct tgsi_token *tokens; /**< Compute program to be executed. */
+ unsigned req_local_mem; /**< Required size of the LOCAL resource. */
+ unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
+ unsigned req_input_mem; /**< Required size of the INPUT resource. */
+};
#ifdef __cplusplus
}