softpipe: add support for compute shaders. (v2)

author Dave Airlie <airlied@redhat.com>

Tue, 26 Apr 2016 04:32:52 +0000 (14:32 +1000)

committer Dave Airlie <airlied@redhat.com>

Tue, 26 Apr 2016 23:01:03 +0000 (09:01 +1000)
author Dave Airlie <airlied@redhat.com>
Tue, 26 Apr 2016 04:32:52 +0000 (14:32 +1000)
committer Dave Airlie <airlied@redhat.com>
Tue, 26 Apr 2016 23:01:03 +0000 (09:01 +1000)
diff --git a/src/gallium/drivers/softpipe/Makefile.sources b/src/gallium/drivers/softpipe/Makefile.sources

index 1d42351f97552fbe9737fe39144c63a75e0174f9..d72266f270f0634a1e71d777a69b8c69c99eedc0 100644 (file)
--- a/src/gallium/drivers/softpipe/Makefile.sources
+++ b/src/gallium/drivers/softpipe/Makefile.sources
@@ -4,6 +4,7 @@ C_SOURCES := \
         sp_clear.h \
         sp_context.c \
         sp_context.h \
+       sp_compute.c \
         sp_draw_arrays.c \
         sp_fence.c \
         sp_fence.h \
diff --git a/src/gallium/drivers/softpipe/sp_compute.c b/src/gallium/drivers/softpipe/sp_compute.c

new file mode 100644 (file)

index 0000000..d5b5913
--- /dev/null
+++ b/src/gallium/drivers/softpipe/sp_compute.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2016 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pstipple.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "sp_context.h"
+#include "sp_screen.h"
+#include "sp_state.h"
+#include "sp_texture.h"
+#include "sp_tex_sample.h"
+#include "sp_tex_tile_cache.h"
+#include "tgsi/tgsi_parse.h"
+
+static void
+cs_prepare(const struct sp_compute_shader *cs,
+           struct tgsi_exec_machine *machine,
+           int w, int h, int d,
+           int g_w, int g_h, int g_d,
+           int b_w, int b_h, int b_d,
+           struct tgsi_sampler *sampler,
+           struct tgsi_image *image,
+           struct tgsi_buffer *buffer )
+{
+   int j;
+   /*
+    * Bind tokens/shader to the interpreter's machine state.
+    */
+   tgsi_exec_machine_bind_shader(machine,
+                                 cs->tokens,
+                                 sampler, image, buffer);
+
+   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
+      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         machine->SystemValue[i].xyzw[0].i[j] = w;
+         machine->SystemValue[i].xyzw[1].i[j] = h;
+         machine->SystemValue[i].xyzw[2].i[j] = d;
+      }
+   }
+
+   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
+      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         machine->SystemValue[i].xyzw[0].i[j] = g_w;
+         machine->SystemValue[i].xyzw[1].i[j] = g_h;
+         machine->SystemValue[i].xyzw[2].i[j] = g_d;
+      }
+   }
+
+   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
+      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         machine->SystemValue[i].xyzw[0].i[j] = b_w;
+         machine->SystemValue[i].xyzw[1].i[j] = b_h;
+         machine->SystemValue[i].xyzw[2].i[j] = b_d;
+      }
+   }
+}
+
+static bool
+cs_run(const struct sp_compute_shader *cs,
+       int g_w, int g_h, int g_d,
+       struct tgsi_exec_machine *machine, bool restart)
+{
+   if (!restart) {
+      if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
+         unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
+         int j;
+         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+            machine->SystemValue[i].xyzw[0].i[j] = g_w;
+            machine->SystemValue[i].xyzw[1].i[j] = g_h;
+            machine->SystemValue[i].xyzw[2].i[j] = g_d;
+         }
+      }
+      machine->NonHelperMask = (1 << 1) - 1;
+   }
+
+   tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
+
+   if (machine->pc != -1)
+      return true;
+   return false;
+}
+
+static void
+run_workgroup(const struct sp_compute_shader *cs,
+              int g_w, int g_h, int g_d, int num_threads,
+              struct tgsi_exec_machine **machines)
+{
+   int i;
+   bool grp_hit_barrier, restart_threads = false;
+
+   do {
+      grp_hit_barrier = false;
+      for (i = 0; i < num_threads; i++) {
+         grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
+      }
+      restart_threads = false;
+      if (grp_hit_barrier) {
+         grp_hit_barrier = false;
+         restart_threads = true;
+      }
+   } while (restart_threads);
+}
+
+static void
+cs_delete(const struct sp_compute_shader *cs,
+          struct tgsi_exec_machine *machine)
+{
+   if (machine->Tokens == cs->tokens) {
+      tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
+   }
+}
+
+static void
+fill_grid_size(struct pipe_context *context,
+               const struct pipe_grid_info *info,
+               uint32_t grid_size[3])
+{
+   struct pipe_transfer *transfer;
+   uint32_t *params;
+   if (!info->indirect) {
+      grid_size[0] = info->grid[0];
+      grid_size[1] = info->grid[1];
+      grid_size[2] = info->grid[2];
+      return;
+   }
+   params = pipe_buffer_map_range(context, info->indirect,
+                                  info->indirect_offset,
+                                  3 * sizeof(uint32_t),
+                                  PIPE_TRANSFER_READ,
+                                  &transfer);
+
+   if (!transfer)
+      return;
+
+   grid_size[0] = params[0];
+   grid_size[1] = params[1];
+   grid_size[2] = params[2];
+   pipe_buffer_unmap(context, transfer);
+}
+
+void
+softpipe_launch_grid(struct pipe_context *context,
+                     const struct pipe_grid_info *info)
+{
+   struct softpipe_context *softpipe = softpipe_context(context);
+   struct sp_compute_shader *cs = softpipe->cs;
+   int num_threads_in_group;
+   struct tgsi_exec_machine **machines;
+   int bwidth, bheight, bdepth;
+   int w, h, d, i;
+   int g_w, g_h, g_d;
+   uint32_t grid_size[3];
+   void *local_mem = NULL;
+
+   softpipe_update_compute_samplers(softpipe);
+   bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
+   bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
+   bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+   num_threads_in_group = bwidth * bheight * bdepth;
+
+   fill_grid_size(context, info, grid_size);
+
+   if (cs->shader.req_local_mem) {
+      local_mem = CALLOC(1, cs->shader.req_local_mem);
+   }
+
+   machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
+   if (!machines)
+      return;
+
+   /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
+   for (d = 0; d < bdepth; d++) {
+      for (h = 0; h < bheight; h++) {
+         for (w = 0; w < bwidth; w++) {
+            int idx = w + (h * bwidth) + (d * bheight * bwidth);
+            machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
+
+            machines[idx]->LocalMem = local_mem;
+            machines[idx]->LocalMemSize = cs->shader.req_local_mem;
+            cs_prepare(cs, machines[idx],
+                       w, h, d,
+                       grid_size[0], grid_size[1], grid_size[2],
+                       bwidth, bheight, bdepth,
+                       (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
+                       (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
+                       (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
+            tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
+                                           softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
+                                           softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
+         }
+      }
+   }
+
+   for (g_d = 0; g_d < grid_size[2]; g_d++) {
+      for (g_h = 0; g_h < grid_size[1]; g_h++) {
+         for (g_w = 0; g_w < grid_size[0]; g_w++) {
+            run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
+         }
+      }
+   }
+
+   for (i = 0; i < num_threads_in_group; i++) {
+      cs_delete(cs, machines[i]);
+      tgsi_exec_machine_destroy(machines[i]);
+   }
+
+   FREE(local_mem);
+   FREE(machines);
+}
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c

index e3ec52462a897882055bac15cd5b3f57aefb6fa5..1690e38f1caa9c33f0f4a8718d2c02c98cba2dd0 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -212,6 +212,7 @@ softpipe_create_context(struct pipe_screen *screen,
  
     softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
     softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
+   softpipe->dump_cs = debug_get_bool_option( "SOFTPIPE_DUMP_CS", FALSE );
  
     softpipe->pipe.screen = screen;
     softpipe->pipe.destroy = softpipe_destroy;
@@ -233,6 +234,8 @@ softpipe_create_context(struct pipe_screen *screen,
  
     softpipe->pipe.draw_vbo = softpipe_draw_vbo;
  
+   softpipe->pipe.launch_grid = softpipe_launch_grid;
+
     softpipe->pipe.clear = softpipe_clear;
     softpipe->pipe.flush = softpipe_flush_wrapped;
     softpipe->pipe.texture_barrier = softpipe_texture_barrier;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h

index 70d00c88b6e46b2ca58001b63f898f3468381d9b..a57f58755372cdd86480cdfaf92dcd92e6b6b8b0 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -71,6 +71,7 @@ struct softpipe_context {
     struct sp_geometry_shader *gs;
     struct sp_velems_state *velems;
     struct sp_so_state *so;
+   struct sp_compute_shader *cs;
  
     /** Other rendering state */
     struct pipe_blend_color blend_color;
@@ -205,10 +206,11 @@ struct softpipe_context {
      * XXX wouldn't it make more sense for the tile cache to just be part
      * of sp_sampler_view?
      */
-   struct softpipe_tex_tile_cache *tex_cache[PIPE_SHADER_GEOMETRY+1][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   struct softpipe_tex_tile_cache *tex_cache[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
  
     unsigned dump_fs : 1;
     unsigned dump_gs : 1;
+   unsigned dump_cs : 1;
     unsigned no_rast : 1;
  };
  
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c

index d89d95c884c3d2eb2855274beb4ba4e58d1c0f14..031602bafc03ba7634e8ce11d4a283bf4568cd81 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -157,7 +157,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
        return 0;
     case PIPE_CAP_COMPUTE:
-      return 0;
+      return 1;
     case PIPE_CAP_USER_VERTEX_BUFFERS:
     case PIPE_CAP_USER_INDEX_BUFFERS:
     case PIPE_CAP_USER_CONSTANT_BUFFERS:
@@ -289,6 +289,8 @@ softpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe
     {
     case PIPE_SHADER_FRAGMENT:
        return tgsi_exec_get_shader_param(param);
+   case PIPE_SHADER_COMPUTE:
+      return tgsi_exec_get_shader_param(param);
     case PIPE_SHADER_VERTEX:
     case PIPE_SHADER_GEOMETRY:
        if (sp_screen->use_llvm)
@@ -447,6 +449,57 @@ softpipe_get_timestamp(struct pipe_screen *_screen)
     return os_time_get_nano();
  }
  
+static int
+softpipe_get_compute_param(struct pipe_screen *_screen,
+                           enum pipe_shader_ir ir_type,
+                           enum pipe_compute_cap param,
+                           void *ret)
+{
+   switch (param) {
+   case PIPE_COMPUTE_CAP_IR_TARGET:
+      return 0;
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      if (ret) {
+         uint64_t *grid_size = ret;
+         grid_size[0] = 65535;
+         grid_size[1] = 65535;
+         grid_size[2] = 65535;
+      }
+      return 3 * sizeof(uint64_t) ;
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      if (ret) {
+         uint64_t *block_size = ret;
+         block_size[0] = 1024;
+         block_size[1] = 1024;
+         block_size[2] = 1024;
+      }
+      return 3 * sizeof(uint64_t);
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      if (ret) {
+         uint64_t *max_threads_per_block = ret;
+         *max_threads_per_block = 1024;
+      }
+      return sizeof(uint64_t);
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      if (ret) {
+         uint64_t *max_local_size = ret;
+         *max_local_size = 32768;
+      }
+      return sizeof(uint64_t);
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      break;
+   }
+   return 0;
+}
+
  /**
   * Create a new pipe_screen object
   * Note: we're not presently subclassing pipe_screen (no softpipe_screen).
@@ -473,7 +526,7 @@ softpipe_create_screen(struct sw_winsys *winsys)
     screen->base.is_format_supported = softpipe_is_format_supported;
     screen->base.context_create = softpipe_create_context;
     screen->base.flush_frontbuffer = softpipe_flush_frontbuffer;
-
+   screen->base.get_compute_param = softpipe_get_compute_param;
     screen->use_llvm = debug_get_option_use_llvm();
  
     util_format_s3tc_init();
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h

index 2fc48ab13d8148b63c08ff18ff01c921d8e50148..0ced70c305d8a47abc5dce0caf1c494bb8257fd6 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -131,6 +131,13 @@ struct sp_so_state {
     struct pipe_stream_output_info base;
  };
  
+/** Subclass of pipe_compute_state */
+struct sp_compute_shader {
+   struct pipe_compute_state shader;
+   struct tgsi_token *tokens;
+   struct tgsi_shader_info info;
+   int max_sampler;             /* -1 if no samplers */
+};
  
  void
  softpipe_init_blend_funcs(struct pipe_context *pipe);
@@ -213,4 +220,10 @@ void
  softpipe_cleanup_geometry_sampling(struct softpipe_context *ctx);
  
  
+void
+softpipe_launch_grid(struct pipe_context *context,
+                     const struct pipe_grid_info *info);
+
+void
+softpipe_update_compute_samplers(struct softpipe_context *softpipe);
  #endif
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c

index 4ce9d95bc6e8f6d2ba75ce424484c64a8352c6cd..92b73783ca85eaecfc1756a41315c85f410a44ec 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -292,6 +292,12 @@ set_shader_sampler(struct softpipe_context *softpipe,
     }
  }
  
+void
+softpipe_update_compute_samplers(struct softpipe_context *softpipe)
+{
+   set_shader_sampler(softpipe, PIPE_SHADER_COMPUTE, softpipe->cs->max_sampler);
+}
+
  static void
  update_tgsi_samplers( struct softpipe_context *softpipe )
  {
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c

index f0d66a53ec6bc76ed7c82888c1a8f797ecc14933..38673d85cdf9edd1e0275139bc135633faf0400e 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -378,6 +378,55 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
     }
  }
  
+static void *
+softpipe_create_compute_state(struct pipe_context *pipe,
+                              const struct pipe_compute_state *templ)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   const struct tgsi_token *tokens;
+   struct sp_compute_shader *state;
+   if (templ->ir_type != PIPE_SHADER_IR_TGSI)
+      return NULL;
+
+   tokens = templ->prog;
+   /* debug */
+   if (softpipe->dump_cs)
+      tgsi_dump(tokens, 0);
+
+   state = CALLOC_STRUCT(sp_compute_shader);
+
+   state->shader = *templ;
+   state->tokens = tgsi_dup_tokens(tokens);
+   tgsi_scan_shader(state->tokens, &state->info);
+
+   state->max_sampler = state->info.file_max[TGSI_FILE_SAMPLER];
+
+   return state;
+}
+
+static void
+softpipe_bind_compute_state(struct pipe_context *pipe,
+                            void *cs)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_compute_shader *state = (struct sp_compute_shader *)cs;
+   if (softpipe->cs == state)
+      return;
+
+   softpipe->cs = state;
+}
+
+static void
+softpipe_delete_compute_state(struct pipe_context *pipe,
+                              void *cs)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_compute_shader *state = (struct sp_compute_shader *)cs;
+
+   assert(softpipe->cs != state);
+   tgsi_free_tokens(state->tokens);
+   FREE(state);
+}
  
  void
  softpipe_init_shader_funcs(struct pipe_context *pipe)
@@ -395,4 +444,8 @@ softpipe_init_shader_funcs(struct pipe_context *pipe)
     pipe->delete_gs_state = softpipe_delete_gs_state;
  
     pipe->set_constant_buffer = softpipe_set_constant_buffer;
+
+   pipe->create_compute_state = softpipe_create_compute_state;
+   pipe->bind_compute_state = softpipe_bind_compute_state;
+   pipe->delete_compute_state = softpipe_delete_compute_state;
  }
author	Dave Airlie <airlied@redhat.com>
	Tue, 26 Apr 2016 04:32:52 +0000 (14:32 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Tue, 26 Apr 2016 23:01:03 +0000 (09:01 +1000)
src/gallium/drivers/softpipe/Makefile.sources		patch \| blob \| history
src/gallium/drivers/softpipe/sp_compute.c	[new file with mode: 0644]	patch \| blob
src/gallium/drivers/softpipe/sp_context.c		patch \| blob \| history
src/gallium/drivers/softpipe/sp_context.h		patch \| blob \| history
src/gallium/drivers/softpipe/sp_screen.c		patch \| blob \| history
src/gallium/drivers/softpipe/sp_state.h		patch \| blob \| history
src/gallium/drivers/softpipe/sp_state_derived.c		patch \| blob \| history
src/gallium/drivers/softpipe/sp_state_shader.c		patch \| blob \| history