gallium: add pipe_grid_info::last_block

author Marek Olšák <marek.olsak@amd.com>

Wed, 27 Feb 2019 22:19:53 +0000 (17:19 -0500)

committer Leo Liu <leo.liu@amd.com>

Fri, 15 Mar 2019 15:53:08 +0000 (11:53 -0400)
author Marek Olšák <marek.olsak@amd.com>
Wed, 27 Feb 2019 22:19:53 +0000 (17:19 -0500)
committer Leo Liu <leo.liu@amd.com>
Fri, 15 Mar 2019 15:53:08 +0000 (11:53 -0400)
diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c

index 50964f3b3ef8486e2878edd49a60133949a068dc..b902c083ad441ce081ec440aba22ef85a9dec154 100644 (file)
--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -341,6 +341,9 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
     case PIPE_CAP_MAX_VARYINGS:
        return 8;
  
+   case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
+      return 0;
+
     default:
        unreachable("bad PIPE_CAP_*");
     }
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst

index 85ca5e1f5cea71f92d79c7cdf9d3fee7c046fc76..60ba9bcbde01f67457d3e6a28cc9b097ff0bf065 100644 (file)
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -492,6 +492,8 @@ The integer capabilities:
    varyings. This will generally correspond to
    ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some
    cases may be a smaller number.
+* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block
+  is implemented by the driver. See struct pipe_grid_info for more details.
  
  .. _pipe_capf:
  
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c

index 87addd53976e08a461bd44b3507a307523d381e5..6c2269d903af0fab19ab98a01e86829b8bec6eb1 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -804,7 +804,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
                  * allow launching waves out-of-order. (same as Vulkan) */
                 S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
  
-       uint *last_block = sctx->compute_last_block;
+       uint *last_block = info->last_block;
         bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
  
         radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c

index f5e9c02dd1086247310aade84087f71839f8288f..a7453099ac6d260e3e3c0e240ea5148df243f139 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -381,7 +381,7 @@ void si_compute_copy_image(struct si_context *sctx,
                                 si_create_copy_image_compute_shader_1d_array(ctx);
                 ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
                 info.block[0] = 64;
-               sctx->compute_last_block[0] = width % 64;
+               info.last_block[0] = width % 64;
                 info.block[1] = 1;
                 info.block[2] = 1;
                 info.grid[0] = DIV_ROUND_UP(width, 64);
@@ -392,9 +392,9 @@ void si_compute_copy_image(struct si_context *sctx,
                         sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
                 ctx->bind_compute_state(ctx, sctx->cs_copy_image);
                 info.block[0] = 8;
-               sctx->compute_last_block[0] = width % 8;
+               info.last_block[0] = width % 8;
                 info.block[1] = 8;
-               sctx->compute_last_block[1] = height % 8;
+               info.last_block[1] = height % 8;
                 info.block[2] = 1;
                 info.grid[0] = DIV_ROUND_UP(width, 8);
                 info.grid[1] = DIV_ROUND_UP(height, 8);
@@ -403,9 +403,6 @@ void si_compute_copy_image(struct si_context *sctx,
  
         ctx->launch_grid(ctx, &info);
  
-       sctx->compute_last_block[0] = 0;
-       sctx->compute_last_block[1] = 0;
-
         sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
                        (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
                        si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
@@ -483,9 +480,9 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
                         sctx->cs_clear_render_target = si_clear_render_target_shader(ctx);
                 ctx->bind_compute_state(ctx, sctx->cs_clear_render_target);
                 info.block[0] = 8;
-               sctx->compute_last_block[0] = width % 8;
+               info.last_block[0] = width % 8;
                 info.block[1] = 8;
-               sctx->compute_last_block[1] = height % 8;
+               info.last_block[1] = height % 8;
                 info.block[2] = 1;
                 info.grid[0] = DIV_ROUND_UP(width, 8);
                 info.grid[1] = DIV_ROUND_UP(height, 8);
@@ -496,7 +493,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
                                 si_clear_render_target_shader_1d_array(ctx);
                 ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array);
                 info.block[0] = 64;
-               sctx->compute_last_block[0] = width % 64;
+               info.last_block[0] = width % 64;
                 info.block[1] = 1;
                 info.block[2] = 1;
                 info.grid[0] = DIV_ROUND_UP(width, 64);
@@ -506,9 +503,6 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
  
         ctx->launch_grid(ctx, &info);
  
-       sctx->compute_last_block[0] = 0;
-       sctx->compute_last_block[1] = 0;
-
         sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
                        (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
                        si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c

index a5cb209b59ee00194563ceee206a59c9b64056d3..6fa67087c7db26b3d6311be6b359d936c924094d 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -160,6 +160,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_TGSI_BALLOT:
         case PIPE_CAP_TGSI_VOTE:
         case PIPE_CAP_TGSI_FS_FBFETCH:
+       case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
                 return 1;
  
         case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index b3198d45ea6cb2a2ba9e19e3223fe2b0f38ac923..b6858b46ec0c11c1722c0e91007816773b182489 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -921,28 +921,6 @@ struct si_context {
         uint32_t                        vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
         uint32_t                        cs_user_data[4];
  
-        /**
-         * last_block allows disabling threads at the farthermost grid boundary.
-         * Full blocks as specified by "block" are launched, but the threads
-         * outside of "last_block" dimensions are disabled.
-         *
-         * If a block touches the grid boundary in the i-th axis, threads with
-         * THREAD_ID[i] >= last_block[i] are disabled.
-         *
-         * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
-         * meaning no effect.
-         *
-         * It's equivalent to doing this at the beginning of the compute shader:
-         *
-         *   for (i = 0; i < 3; i++) {
-         *      if (block_id[i] == grid[i] - 1 &&
-         *          last_block[i] && last_block[i] >= thread_id[i])
-         *         return;
-         *   }
-         * (this could be moved into pipe_grid_info)
-         */
-        uint compute_last_block[3];
-
         /* Vertex and index buffers. */
         bool                            vertex_buffers_dirty;
         bool                            vertex_buffer_pointer_dirty;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h

index e2b0104ce43d3810416f4d1135812b31375c3651..d4732dc257fe592fd50c84cfc630ee349da22679 100644 (file)
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -858,6 +858,7 @@ enum pipe_cap
     PIPE_CAP_DEST_SURFACE_SRGB_CONTROL,
     PIPE_CAP_NIR_COMPACT_ARRAYS,
     PIPE_CAP_MAX_VARYINGS,
+   PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK,
  };
  
  /**
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h

index 38052e5fd3d5a01480dc9e8461a4d325a2cc8f0e..3a91ddd71b5b7c6a0d6415166512ee791e452ab0 100644 (file)
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -838,6 +838,27 @@ struct pipe_grid_info
      */
     uint block[3];
  
+   /**
+    * last_block allows disabling threads at the farthermost grid boundary.
+    * Full blocks as specified by "block" are launched, but the threads
+    * outside of "last_block" dimensions are disabled.
+    *
+    * If a block touches the grid boundary in the i-th axis, threads with
+    * THREAD_ID[i] >= last_block[i] are disabled.
+    *
+    * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
+    * meaning no effect.
+    *
+    * It's equivalent to doing this at the beginning of the compute shader:
+    *
+    *   for (i = 0; i < 3; i++) {
+    *      if (block_id[i] == grid[i] - 1 &&
+    *          last_block[i] && thread_id[i] >= last_block[i])
+    *         return;
+    *   }
+    */
+   uint last_block[3];
+
     /**
      * Determine the layout of the grid (in block units) to be used.
      */
author	Marek Olšák <marek.olsak@amd.com>
	Wed, 27 Feb 2019 22:19:53 +0000 (17:19 -0500)
committer	Leo Liu <leo.liu@amd.com>
	Fri, 15 Mar 2019 15:53:08 +0000 (11:53 -0400)
src/gallium/auxiliary/util/u_screen.c		patch \| blob \| history
src/gallium/docs/source/screen.rst		patch \| blob \| history
src/gallium/drivers/radeonsi/si_compute.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_compute_blit.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_get.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/include/pipe/p_defines.h		patch \| blob \| history
src/gallium/include/pipe/p_state.h		patch \| blob \| history