i965: Remove old CS local ID handling

author Jordan Justen <jordan.l.justen@intel.com>

Mon, 23 May 2016 05:31:06 +0000 (22:31 -0700)

committer Jordan Justen <jordan.l.justen@intel.com>

Thu, 2 Jun 2016 02:29:02 +0000 (19:29 -0700)
author Jordan Justen <jordan.l.justen@intel.com>
Mon, 23 May 2016 05:31:06 +0000 (22:31 -0700)
committer Jordan Justen <jordan.l.justen@intel.com>
Thu, 2 Jun 2016 02:29:02 +0000 (19:29 -0700)
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c

index edaaa3d2efad23097eb90d55a8a36bff9f97640b..3d37de2fbaf3fff99fcab60e97c2dfd0c457ec4d 100644 (file)
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1094,13 +1094,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
     }
  
     if (cs_prog_data->push.per_thread.size > 0) {
-      brw_cs_fill_local_id_payload(cs_prog_data, u32_map, cs_prog_data->threads,
-                                   cs_prog_data->push.per_thread.size);
        for (unsigned t = 0; t < cs_prog_data->threads; t++) {
           unsigned dst =
              8 * (cs_prog_data->push.per_thread.regs * t +
-                 cs_prog_data->push.cross_thread.regs +
-                 cs_prog_data->local_invocation_id_regs);
+                 cs_prog_data->push.cross_thread.regs);
           unsigned src = cs_prog_data->push.cross_thread.dwords;
           for ( ; src < prog_data->nr_params; src++, dst++) {
              if (src != cs_prog_data->thread_local_id_index) {
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h

index dda62974297e144eb7f39594895063b23c691ab4..6e6d20c7d3819714362a7528da7e6e5f3eb3c17f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -439,7 +439,6 @@ struct brw_cs_prog_data {
     unsigned threads;
     bool uses_barrier;
     bool uses_num_work_groups;
-   unsigned local_invocation_id_regs;
     int thread_local_id_index;
  
     struct {
@@ -831,13 +830,6 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
                 unsigned *final_assembly_size,
                 char **error_str);
  
-/**
- * Fill out local id payload for compute shader according to cs_prog_data.
- */
-void
-brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
-                             void *buffer, uint32_t threads, uint32_t stride);
-
  #ifdef __cplusplus
  } /* extern "C" */
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 0b766a4a848a532a252abd7b2927286e0e986af2..9abe73acef2bcd9cbe18fc48652b798a7d2506f0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5581,31 +5581,6 @@ fs_visitor::setup_vs_payload()
     payload.num_regs = 2;
  }
  
-/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- *    no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- *    easily use 16-bit words rather than 32-bit dwords in the push constant
- *    data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- *    conveying the data, and thereby reduce push constant usage.
- *
- */
  void
  fs_visitor::setup_gs_payload()
  {
@@ -5649,16 +5624,7 @@ void
  fs_visitor::setup_cs_payload()
  {
     assert(devinfo->gen >= 7);
-   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
-
     payload.num_regs = 1;
-
-   if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID &&
-       prog_data->thread_local_id_index < 0) {
-      prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
-      payload.local_invocation_id_reg = payload.num_regs;
-      payload.num_regs += prog_data->local_invocation_id_regs;
-   }
  }
  
  void
@@ -6532,25 +6498,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
     return g.get_assembly(final_assembly_size);
  }
  
-fs_reg *
-fs_visitor::emit_cs_local_invocation_id_setup()
-{
-   assert(stage == MESA_SHADER_COMPUTE);
-
-   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
-
-   struct brw_reg src =
-      brw_vec8_grf(payload.local_invocation_id_reg, 0);
-   src = retype(src, BRW_REGISTER_TYPE_UD);
-   bld.MOV(*reg, src);
-   src.nr += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 1), src);
-   src.nr += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 2), src);
-
-   return reg;
-}
-
  fs_reg *
  fs_visitor::emit_cs_work_group_id_setup()
  {
@@ -6597,9 +6544,7 @@ cs_fill_push_const_info(const struct brw_device_info *devinfo,
     unsigned cross_thread_dwords, per_thread_dwords;
     if (!cross_thread_supported) {
        cross_thread_dwords = 0u;
-      per_thread_dwords =
-         8 * cs_prog_data->local_invocation_id_regs +
-         prog_data->nr_params;
+      per_thread_dwords = prog_data->nr_params;
     } else if (fill_thread_id) {
        /* Fill all but the last register with cross-thread payload */
        cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
@@ -6623,7 +6568,6 @@ cs_fill_push_const_info(const struct brw_device_info *devinfo,
            cs_prog_data->push.per_thread.size == 0);
     assert(cs_prog_data->push.cross_thread.dwords +
            cs_prog_data->push.per_thread.dwords ==
-             8 * cs_prog_data->local_invocation_id_regs +
               prog_data->nr_params);
  }
  
@@ -6768,39 +6712,3 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
  
     return g.get_assembly(final_assembly_size);
  }
-
-void
-brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
-                             void *buffer, uint32_t threads, uint32_t stride)
-{
-   if (prog_data->local_invocation_id_regs == 0)
-      return;
-
-   /* 'stride' should be an integer number of registers, that is, a multiple
-    * of 32 bytes.
-    */
-   assert(stride % 32 == 0);
-
-   unsigned x = 0, y = 0, z = 0;
-   for (unsigned t = 0; t < threads; t++) {
-      uint32_t *param = (uint32_t *) buffer + stride * t / 4;
-
-      for (unsigned i = 0; i < prog_data->simd_size; i++) {
-         param[0 * prog_data->simd_size + i] = x;
-         param[1 * prog_data->simd_size + i] = y;
-         param[2 * prog_data->simd_size + i] = z;
-
-         x++;
-         if (x == prog_data->local_size[0]) {
-            x = 0;
-            y++;
-            if (y == prog_data->local_size[1]) {
-               y = 0;
-               z++;
-               if (z == prog_data->local_size[2])
-                  z = 0;
-            }
-         }
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 4c1ac9cedd25a248fac5538caa2dc026bf23201e..4237197d8d2f6e8b1b8cc6304616a017b92f7e89 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -267,7 +267,6 @@ public:
                             unsigned base_offset, const nir_src &offset_src,
                             unsigned num_components);
     void emit_cs_terminate();
-   fs_reg *emit_cs_local_invocation_id_setup();
     fs_reg *emit_cs_work_group_id_setup();
  
     void emit_barrier();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 81c72047e2585cf7680b58c9437616b614390ece..7fc43b5061dbdc1a7b8f64221512c7e19ce2d069 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -272,13 +272,6 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
              *reg = *v->emit_samplemaskin_setup();
           break;
  
-      case nir_intrinsic_load_local_invocation_id:
-         assert(v->stage == MESA_SHADER_COMPUTE);
-         reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_cs_local_invocation_id_setup();
-         break;
-
        case nir_intrinsic_load_work_group_id:
           assert(v->stage == MESA_SHADER_COMPUTE);
           reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
diff --git a/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c b/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c

index 972b1171730fa6be4a3812a4518b2474fe27f52c..00155fbaffb62be9839c89f11138aa7d7ca46528 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
@@ -161,13 +161,6 @@ brw_nir_lower_intrinsics(nir_shader *nir, struct brw_stage_prog_data *prog_data)
     state.nir = nir;
     state.prog_data = prog_data;
  
-   /* Currently this pass only lowers intrinsics using the uniform specified
-    * by thread_local_id_index.
-    */
-   if (nir->stage == MESA_SHADER_COMPUTE &&
-       state.cs_prog_data->thread_local_id_index < 0)
-      return false;
-
     do {
        state.progress = false;
        nir_foreach_function(function, nir) {
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c

index f97c26a7d4f7800403c0c59012c3eaab0a2912ed..750aa2ccdf171924aac6484ec0b7618aed2a8811 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -241,13 +241,10 @@ brw_upload_cs_push_constants(struct brw_context *brw,
  
     gl_constant_value thread_id;
     if (cs_prog_data->push.per_thread.size > 0) {
-      brw_cs_fill_local_id_payload(cs_prog_data, param, cs_prog_data->threads,
-                                   cs_prog_data->push.per_thread.size);
        for (unsigned t = 0; t < cs_prog_data->threads; t++) {
           unsigned dst =
              8 * (cs_prog_data->push.per_thread.regs * t +
-                 cs_prog_data->push.cross_thread.regs +
-                 cs_prog_data->local_invocation_id_regs);
+                 cs_prog_data->push.cross_thread.regs);
           unsigned src = cs_prog_data->push.cross_thread.dwords;
           for ( ; src < prog_data->nr_params; src++, dst++) {
              if (src != cs_prog_data->thread_local_id_index)
author	Jordan Justen <jordan.l.justen@intel.com>
	Mon, 23 May 2016 05:31:06 +0000 (22:31 -0700)
committer	Jordan Justen <jordan.l.justen@intel.com>
	Thu, 2 Jun 2016 02:29:02 +0000 (19:29 -0700)
src/intel/vulkan/anv_cmd_buffer.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_compiler.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir_intrinsics.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_cs_state.c		patch \| blob \| history