intel/cs: Make thread_local_id a regular builtin param
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 29 Sep 2017 19:22:48 +0000 (12:22 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 13 Oct 2017 05:39:31 +0000 (22:39 -0700)
This is a lot more natural than special casing it all over the place.
We still have to do a bit of special-casing in assign_constant_locations
but it's not special-cased quite as bad as it was before.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_nir_lower_cs_intrinsics.c
src/intel/vulkan/anv_cmd_buffer.c
src/mesa/drivers/dri/i965/gen6_constant_state.c

index e20465812789129dc862a2f27575aece256062a7..014202d36ca12419254341e58367b4dec475f13b 100644 (file)
@@ -542,6 +542,8 @@ enum brw_param_builtin {
    BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W,
    BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
    BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
+
+   BRW_PARAM_BUILTIN_THREAD_LOCAL_ID,
 };
 
 #define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \
@@ -738,7 +740,6 @@ struct brw_cs_prog_data {
    unsigned threads;
    bool uses_barrier;
    bool uses_num_work_groups;
-   int thread_local_id_index;
 
    struct {
       struct brw_push_const_block cross_thread;
index f9e7385afdf2b26a3f3a4d58339d89725191b287..66cb33131b6f724fdafc40eea68a0fc3934d6ece 100644 (file)
@@ -1935,6 +1935,20 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
    }
 }
 
+static int
+get_thread_local_id_param_index(const brw_stage_prog_data *prog_data)
+{
+   if (prog_data->nr_params == 0)
+      return -1;
+
+   /* The local thread id is always the last parameter in the list */
+   uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
+   if (last_param == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID)
+      return prog_data->nr_params - 1;
+
+   return -1;
+}
+
 /**
  * Assign UNIFORM file registers to either push constants or pull constants.
  *
@@ -1963,10 +1977,6 @@ fs_visitor::assign_constant_locations()
    bool contiguous[uniforms];
    memset(contiguous, 0, sizeof(contiguous));
 
-   int thread_local_id_index =
-      (stage == MESA_SHADER_COMPUTE) ?
-      brw_cs_prog_data(stage_prog_data)->thread_local_id_index : -1;
-
    /* First, we walk through the instructions and do two things:
     *
     *  1) Figure out which uniforms are live.
@@ -2009,8 +2019,7 @@ fs_visitor::assign_constant_locations()
       }
    }
 
-   if (thread_local_id_index >= 0 && !is_live[thread_local_id_index])
-      thread_local_id_index = -1;
+   int thread_local_id_index = get_thread_local_id_param_index(stage_prog_data);
 
    /* Only allow 16 registers (128 uniform components) as push constants.
     *
@@ -2118,22 +2127,15 @@ fs_visitor::assign_constant_locations()
     * push_constant_loc[i] <= i and we can do it in one smooth loop without
     * having to make a copy.
     */
-   int new_thread_local_id_index = -1;
    for (unsigned int i = 0; i < uniforms; i++) {
       uint32_t value = param[i];
       if (pull_constant_loc[i] != -1) {
          stage_prog_data->pull_param[pull_constant_loc[i]] = value;
       } else if (push_constant_loc[i] != -1) {
          stage_prog_data->param[push_constant_loc[i]] = value;
-         if (thread_local_id_index == (int)i)
-            new_thread_local_id_index = push_constant_loc[i];
       }
    }
    ralloc_free(param);
-
-   if (stage == MESA_SHADER_COMPUTE)
-      brw_cs_prog_data(stage_prog_data)->thread_local_id_index =
-         new_thread_local_id_index;
 }
 
 bool
@@ -6698,24 +6700,20 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo,
                         struct brw_cs_prog_data *cs_prog_data)
 {
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
-   bool fill_thread_id =
-      cs_prog_data->thread_local_id_index >= 0 &&
-      cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
+   int thread_local_id_index = get_thread_local_id_param_index(prog_data);
    bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
 
    /* The thread ID should be stored in the last param dword */
-   assert(prog_data->nr_params > 0 || !fill_thread_id);
-   assert(!fill_thread_id ||
-          cs_prog_data->thread_local_id_index ==
-             (int)prog_data->nr_params - 1);
+   assert(thread_local_id_index == -1 ||
+          thread_local_id_index == (int)prog_data->nr_params - 1);
 
    unsigned cross_thread_dwords, per_thread_dwords;
    if (!cross_thread_supported) {
       cross_thread_dwords = 0u;
       per_thread_dwords = prog_data->nr_params;
-   } else if (fill_thread_id) {
+   } else if (thread_local_id_index >= 0) {
       /* Fill all but the last register with cross-thread payload */
-      cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
+      cross_thread_dwords = 8 * (thread_local_id_index / 8);
       per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
       assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
    } else {
index 01718eb5dd1b0f02366a41671220da844a30cea8..9b4a0fdf2eb474d720e754ab84d502483205dd4d 100644 (file)
@@ -30,6 +30,7 @@ struct lower_intrinsics_state {
    nir_function_impl *impl;
    bool progress;
    nir_builder builder;
+   int thread_local_id_index;
 };
 
 static nir_ssa_def *
@@ -47,12 +48,13 @@ read_thread_local_id(struct lower_intrinsics_state *state)
    if (group_size <= 8)
       return nir_imm_int(b, 0);
 
-   if (prog_data->thread_local_id_index == -1) {
-      prog_data->thread_local_id_index = prog_data->base.nr_params;
-      brw_stage_prog_data_add_params(&prog_data->base, 1);
+   if (state->thread_local_id_index == -1) {
+      state->thread_local_id_index = prog_data->base.nr_params;
+      uint32_t *param = brw_stage_prog_data_add_params(&prog_data->base, 1);
+      *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
       nir->num_uniforms += 4;
    }
-   unsigned id_index = prog_data->thread_local_id_index;
+   unsigned id_index = state->thread_local_id_index;
 
    nir_intrinsic_instr *load =
       nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
@@ -165,7 +167,7 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
    state.nir = nir;
    state.prog_data = prog_data;
 
-   state.prog_data->thread_local_id_index = -1;
+   state.thread_local_id_index = -1;
 
    do {
       state.progress = false;
index 64d1417f5b1ab6571eb34c6f38f6f480b8f8ad06..b45f8f83757c64f6382e78143572493c6d0d92bd 100644 (file)
@@ -707,12 +707,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    uint32_t *u32_map = state.map;
 
    if (cs_prog_data->push.cross_thread.size > 0) {
-      assert(cs_prog_data->thread_local_id_index < 0 ||
-             cs_prog_data->thread_local_id_index >=
-                cs_prog_data->push.cross_thread.dwords);
       for (unsigned i = 0;
            i < cs_prog_data->push.cross_thread.dwords;
            i++) {
+         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
          u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
       }
    }
@@ -724,11 +722,11 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
                  cs_prog_data->push.cross_thread.regs);
          unsigned src = cs_prog_data->push.cross_thread.dwords;
          for ( ; src < prog_data->nr_params; src++, dst++) {
-            if (src != cs_prog_data->thread_local_id_index) {
+            if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
+               u32_map[dst] = t * cs_prog_data->simd_size;
+            } else {
                u32_map[dst] =
                   anv_push_constant_value(data, prog_data->param[src]);
-            } else {
-               u32_map[dst] = t * cs_prog_data->simd_size;
             }
          }
       }
index eb9e29187c411aa8fb247924d0b79a3a41082bce..62ad6b04976fbda57ff094726ba437fa45f7a386 100644 (file)
@@ -305,12 +305,10 @@ brw_upload_cs_push_constants(struct brw_context *brw,
 
    if (cs_prog_data->push.cross_thread.size > 0) {
       uint32_t *param_copy = param;
-      assert(cs_prog_data->thread_local_id_index < 0 ||
-             cs_prog_data->thread_local_id_index >=
-                cs_prog_data->push.cross_thread.dwords);
       for (unsigned i = 0;
            i < cs_prog_data->push.cross_thread.dwords;
            i++) {
+         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
          param_copy[i] = brw_param_value(brw, prog, stage_state,
                                          prog_data->param[i]);
       }
@@ -323,11 +321,11 @@ brw_upload_cs_push_constants(struct brw_context *brw,
                  cs_prog_data->push.cross_thread.regs);
          unsigned src = cs_prog_data->push.cross_thread.dwords;
          for ( ; src < prog_data->nr_params; src++, dst++) {
-            if (src != cs_prog_data->thread_local_id_index) {
+            if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
+               param[dst] = t * cs_prog_data->simd_size;
+            } else {
                param[dst] = brw_param_value(brw, prog, stage_state,
                                             prog_data->param[src]);
-            } else {
-               param[dst] = t * cs_prog_data->simd_size;
             }
          }
       }