gallium: Basic compute interface.

author Francisco Jerez <currojerez@riseup.net>

Wed, 25 Apr 2012 20:15:16 +0000 (22:15 +0200)

committer Francisco Jerez <currojerez@riseup.net>

Fri, 11 May 2012 10:39:39 +0000 (12:39 +0200)
author Francisco Jerez <currojerez@riseup.net>
Wed, 25 Apr 2012 20:15:16 +0000 (22:15 +0200)
committer Francisco Jerez <currojerez@riseup.net>
Fri, 11 May 2012 10:39:39 +0000 (12:39 +0200)
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst

index b2872cd282f39f40219050b0d8fe9bcb8bacd84e..cb9b8de7d5a6d4de671a7623bad28e41780836ae 100644 (file)
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -542,3 +542,42 @@ These flags control the behavior of a transfer object.
  ``PIPE_TRANSFER_FLUSH_EXPLICIT``
    Written ranges will be notified later with :ref:`transfer_flush_region`.
    Cannot be used with ``PIPE_TRANSFER_READ``.
+
+
+Compute kernel execution
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A compute program can be defined, bound or destroyed using
+``create_compute_state``, ``bind_compute_state`` or
+``destroy_compute_state`` respectively.
+
+Any of the subroutines contained within the compute program can be
+executed on the device using the ``launch_grid`` method.  This method
+will execute as many instances of the program as elements in the
+specified N-dimensional grid, hopefully in parallel.
+
+The compute program has access to four special resources:
+
+* ``GLOBAL`` represents a memory space shared among all the threads
+  running on the device.  An arbitrary buffer created with the
+  ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the
+  ``set_global_binding`` method.
+
+* ``LOCAL`` represents a memory space shared among all the threads
+  running in the same working group.  The initial contents of this
+  resource are undefined.
+
+* ``PRIVATE`` represents a memory space local to a single thread.
+  The initial contents of this resource are undefined.
+
+* ``INPUT`` represents a read-only memory space that can be
+  initialized at ``launch_grid`` time.
+
+These resources use a byte-based addressing scheme, and they can be
+accessed from the compute program by means of the LOAD/STORE TGSI
+opcodes.
+
+In addition, normal texture sampling is allowed from the compute
+program: ``bind_compute_sampler_states`` may be used to set up texture
+samplers for the compute stage and ``set_compute_sampler_views`` may
+be used to bind a number of sampler views to it.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst

index 05f7e8fc5397a4eb9bbf14bc7882b98f8aee201b..5d8280a244bdba4aaa8b3d353ac7a9db9c5cbbf1 100644 (file)
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -110,7 +110,8 @@ The integer capabilities:
  * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes
    a hw limitation.  If true, pipe_vertex_element::src_offset must always be
    aligned to 4.  If false, there are no restrictions on src_offset.
-
+* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the
+  compute entry points defined in pipe_context and pipe_screen.
  
  
  .. _pipe_capf:
@@ -186,6 +187,29 @@ to be 0.
    samplers.
  
  
+.. _pipe_compute_cap:
+
+PIPE_COMPUTE_CAP_*
+^^^^^^^^^^^^^^^^^^
+
+Compute-specific capabilities. They can be queried using
+pipe_screen::get_compute_param.
+
+* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
+  for grid and block coordinates.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
+  resource.  Value type: ``uint64_t``.
+
  .. _pipe_bind:
  
  PIPE_BIND_*
@@ -223,6 +247,8 @@ resources might be created and handled quite differently.
  * ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer.
  * ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another
    process.
+* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
+  address space of a compute program.
  
  .. _pipe_usage:
  
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h

index 8b4a1588b86c11e1492a342a676c6e957388520a..3c0b89e9c2d8d8ae84ac72191333f817061b9f7f 100644 (file)
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -63,6 +63,7 @@ struct pipe_vertex_element;
  struct pipe_video_buffer;
  struct pipe_video_decoder;
  struct pipe_viewport_state;
+struct pipe_compute_state;
  union pipe_color_union;
  union pipe_query_result;
  
@@ -141,6 +142,10 @@ struct pipe_context {
     void   (*bind_geometry_sampler_states)(struct pipe_context *,
                                            unsigned num_samplers,
                                            void **samplers);
+   void   (*bind_compute_sampler_states)(struct pipe_context *,
+                                         unsigned start_slot,
+                                         unsigned num_samplers,
+                                         void **samplers);
     void   (*delete_sampler_state)(struct pipe_context *, void *);
  
     void * (*create_rasterizer_state)(struct pipe_context *,
@@ -220,6 +225,10 @@ struct pipe_context {
                                        unsigned num_views,
                                        struct pipe_sampler_view **);
  
+   void (*set_compute_sampler_views)(struct pipe_context *,
+                                     unsigned start_slot, unsigned num_views,
+                                     struct pipe_sampler_view **);
+
     void (*set_vertex_buffers)( struct pipe_context *,
                                 unsigned num_buffers,
                                 const struct pipe_vertex_buffer * );
@@ -418,6 +427,70 @@ struct pipe_context {
      */
     struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context,
                                                       const struct pipe_video_buffer *templat );
+
+   /**
+    * Compute kernel execution
+    */
+   /*@{*/
+   /**
+    * Define the compute program and parameters to be used by
+    * pipe_context::launch_grid.
+    */
+   void *(*create_compute_state)(struct pipe_context *context,
+                                const struct pipe_compute_state *);
+   void (*bind_compute_state)(struct pipe_context *, void *);
+   void (*delete_compute_state)(struct pipe_context *, void *);
+
+   /**
+    * Bind an array of buffers to be mapped into the address space of
+    * the GLOBAL resource.  Any buffers that were previously bound
+    * between [first, first + count - 1] are unbound after this call.
+    *
+    * \param first      first buffer to map.
+    * \param count      number of consecutive buffers to map.
+    * \param resources  array of pointers to the buffers to map, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no new
+    *                   resources will be bound.
+    * \param handles    array of pointers to the memory locations that
+    *                   will be filled with the respective base
+    *                   addresses each buffer will be mapped to.  It
+    *                   should contain at least \a count elements,
+    *                   unless \a resources is NULL in which case \a
+    *                   handles should be NULL as well.
+    *
+    * Note that the driver isn't required to make any guarantees about
+    * the contents of the \a handles array being valid anytime except
+    * during the subsequent calls to pipe_context::launch_grid.  This
+    * means that the only sensible location handles[i] may point to is
+    * somewhere within the INPUT buffer itself.  This is so to
+    * accommodate implementations that lack virtual memory but
+    * nevertheless migrate buffers on the fly, leading to resource
+    * base addresses that change on each kernel invocation or are
+    * unknown to the pipe driver.
+    */
+   void (*set_global_binding)(struct pipe_context *context,
+                              unsigned first, unsigned count,
+                              struct pipe_resource **resources,
+                              uint32_t **handles);
+
+   /**
+    * Launch the compute kernel starting from instruction \a pc of the
+    * currently bound compute program.
+    *
+    * \a grid_layout and \a block_layout are arrays of size \a
+    * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
+    * grid (in block units) and working block (in thread units) to be
+    * used, respectively.
+    *
+    * \a input will be used to initialize the INPUT resource, and it
+    * should point to a buffer of at least
+    * pipe_compute_state::req_input_mem bytes.
+    */
+   void (*launch_grid)(struct pipe_context *context,
+                       const uint *block_layout, const uint *grid_layout,
+                       uint32_t pc, const void *input);
+   /*@}*/
  };
  
  
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h

index 8b6d00d4318ed12b7a5cb3b9c6deb44e3dc2cbd0..c4c217b911f4353d1d0f439f8aa34559f363dd56 100644 (file)
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -304,6 +304,7 @@ enum pipe_transfer_usage {
  #define PIPE_BIND_STREAM_OUTPUT        (1 << 11) /* set_stream_output_buffers */
  #define PIPE_BIND_CURSOR               (1 << 16) /* mouse cursor */
  #define PIPE_BIND_CUSTOM               (1 << 17) /* state-tracker/winsys usages */
+#define PIPE_BIND_GLOBAL               (1 << 18) /* set_global_binding */
  
  /* The first two flags above were previously part of the amorphous
   * TEXTURE_USAGE, most of which are now descriptions of the ways a
@@ -346,7 +347,8 @@ enum pipe_transfer_usage {
  #define PIPE_SHADER_VERTEX   0
  #define PIPE_SHADER_FRAGMENT 1
  #define PIPE_SHADER_GEOMETRY 2
-#define PIPE_SHADER_TYPES    3
+#define PIPE_SHADER_COMPUTE  3
+#define PIPE_SHADER_TYPES    4
  
  
  /**
@@ -477,6 +479,7 @@ enum pipe_cap {
     PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65,
     PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66,
     PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67,
+   PIPE_CAP_COMPUTE = 68
  };
  
  /**
@@ -522,6 +525,20 @@ enum pipe_shader_cap
     PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18
  };
  
+/**
+ * Compute-specific implementation capability.  They can be queried
+ * using pipe_screen::get_compute_param.
+ */
+enum pipe_compute_cap
+{
+   PIPE_COMPUTE_CAP_GRID_DIMENSION,
+   PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+   PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE,
+   PIPE_COMPUTE_CAP_MAX_INPUT_SIZE
+};
  
  /**
   * Composite query types
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h

index 45c441b2fcf775bcd79d633be088c12e28953c2f..7ae7c9a04e15ad553aa95b8fde3fe2425b9e1cfd 100644 (file)
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -98,6 +98,18 @@ struct pipe_screen {
                            enum pipe_video_profile profile,
                            enum pipe_video_cap param );
  
+   /**
+    * Query a compute-specific capability/parameter/limit.
+    * \param param  one of PIPE_COMPUTE_CAP_x
+    * \param ret    pointer to a preallocated buffer that will be
+    *               initialized to the parameter value, or NULL.
+    * \return       size in bytes of the parameter value that would be
+    *               returned.
+    */
+   int (*get_compute_param)(struct pipe_screen *,
+                           enum pipe_compute_cap param,
+                           void *ret);
+
     struct pipe_context * (*context_create)( struct pipe_screen *,
                                             void *priv );
  
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h

index df2dd5e618ef0d7ff2689fd67ab8ff1da4849b85..9d08fde9f032c34e34b0ced6c5c651ac2499b48b 100644 (file)
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -166,6 +166,15 @@ struct tgsi_declaration_resource {
     unsigned ReturnTypeW : 6; /**< one of enum pipe_type */
  };
  
+/*
+ * Special resources that don't need to be declared.  They map to the
+ * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces.
+ */
+#define TGSI_RESOURCE_GLOBAL   0x7fff
+#define TGSI_RESOURCE_LOCAL    0x7ffe
+#define TGSI_RESOURCE_PRIVATE  0x7ffd
+#define TGSI_RESOURCE_INPUT    0x7ffc
+
  #define TGSI_IMM_FLOAT32   0
  #define TGSI_IMM_UINT32    1
  #define TGSI_IMM_INT32     2
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h

index a459a56a760e40b1973111ca50a26b0dcc03fc45..74f4ebd8f4cf4c564fae805aa7255741c3c8e0c3 100644 (file)
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -580,6 +580,13 @@ struct pipe_resolve_info
     unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
  };
  
+struct pipe_compute_state
+{
+   const struct tgsi_token *tokens; /**< Compute program to be executed. */
+   unsigned req_local_mem; /**< Required size of the LOCAL resource. */
+   unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
+   unsigned req_input_mem; /**< Required size of the INPUT resource. */
+};
  
  #ifdef __cplusplus
  }
author	Francisco Jerez <currojerez@riseup.net>
	Wed, 25 Apr 2012 20:15:16 +0000 (22:15 +0200)
committer	Francisco Jerez <currojerez@riseup.net>
	Fri, 11 May 2012 10:39:39 +0000 (12:39 +0200)
src/gallium/docs/source/context.rst		patch \| blob \| history
src/gallium/docs/source/screen.rst		patch \| blob \| history
src/gallium/include/pipe/p_context.h		patch \| blob \| history
src/gallium/include/pipe/p_defines.h		patch \| blob \| history
src/gallium/include/pipe/p_screen.h		patch \| blob \| history
src/gallium/include/pipe/p_shader_tokens.h		patch \| blob \| history
src/gallium/include/pipe/p_state.h		patch \| blob \| history