i965: Rework the TCS passthrough shader to use NIR.
authorKenneth Graunke <kenneth@whitecape.org>
Sat, 9 Apr 2016 06:19:34 +0000 (23:19 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 3 May 2016 23:27:52 +0000 (16:27 -0700)
I'm about to implement a scalar TCS backend, and I'd rather not
duplicate all of this code there.

One change is that we now write the tessellation levels from all
TCS threads, rather than just the first.  This is pretty harmless,
and was easier.  The IF/ENDIF needed for that are gone; otherwise
the generated code is basically identical.

I chose to emit load/store intrinsics directly because it was easier.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_tcs.c
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.h

index 7e4142602843640098ac3322a3cf5549d3351ba8..0117ffe358977dd7c2e377efec0757548b697543 100644 (file)
 #include "brw_shader.h"
 #include "brw_state.h"
 #include "program/prog_parameter.h"
+#include "nir_builder.h"
+
+static nir_shader *
+create_passthrough_tcs(const struct brw_compiler *compiler,
+                       const nir_shader_compiler_options *options,
+                       const struct brw_tcs_prog_key *key)
+{
+   nir_builder b;
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_TESS_CTRL, options);
+   nir_shader *nir = b.shader;
+   nir_variable *var;
+   nir_intrinsic_instr *load;
+   nir_intrinsic_instr *store;
+   nir_ssa_def *zero = nir_imm_int(&b, 0);
+   nir_ssa_def *invoc_id =
+      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
+
+   nir->info.inputs_read = key->outputs_written;
+   nir->info.outputs_written = key->outputs_written;
+   nir->info.tcs.vertices_out = key->input_vertices;
+   nir->info.name = ralloc_strdup(nir, "passthrough");
+   nir->num_uniforms = 8 * sizeof(uint32_t);
+
+   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0");
+   var->data.location = 0;
+   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1");
+   var->data.location = 1;
+
+   /* Write the patch URB header. */
+   for (int i = 0; i <= 1; i++) {
+      load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
+      load->num_components = 4;
+      load->src[0] = nir_src_for_ssa(zero);
+      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
+      nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t));
+      nir_builder_instr_insert(&b, &load->instr);
+
+      store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output);
+      store->num_components = 4;
+      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
+      store->src[1] = nir_src_for_ssa(zero);
+      nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i);
+      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
+      nir_builder_instr_insert(&b, &store->instr);
+   }
+
+   /* Copy inputs to outputs. */
+   uint64_t varyings = key->outputs_written;
+
+   while (varyings != 0) {
+      const int varying = ffsll(varyings) - 1;
+
+      load = nir_intrinsic_instr_create(nir,
+                                        nir_intrinsic_load_per_vertex_input);
+      load->num_components = 4;
+      load->src[0] = nir_src_for_ssa(invoc_id);
+      load->src[1] = nir_src_for_ssa(zero);
+      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
+      nir_intrinsic_set_base(load, varying);
+      nir_builder_instr_insert(&b, &load->instr);
+
+      store = nir_intrinsic_instr_create(nir,
+                                         nir_intrinsic_store_per_vertex_output);
+      store->num_components = 4;
+      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
+      store->src[1] = nir_src_for_ssa(invoc_id);
+      store->src[2] = nir_src_for_ssa(zero);
+      nir_intrinsic_set_base(store, varying);
+      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
+      nir_builder_instr_insert(&b, &store->instr);
+
+      varyings &= ~BITFIELD64_BIT(varying);
+   }
+
+   nir_validate_shader(nir);
+
+   nir = brw_preprocess_nir(compiler, nir);
+
+   return nir;
+}
 
 static void
 brw_tcs_debug_recompile(struct brw_context *brw,
@@ -103,12 +183,7 @@ brw_codegen_tcs_prog(struct brw_context *brw,
        */
       const nir_shader_compiler_options *options =
          ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
-      nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, options);
-      nir->num_uniforms = 2; /* both halves of the patch header */
-      nir->info.outputs_written = key->outputs_written;
-      nir->info.inputs_read = key->outputs_written;
-      nir->info.tcs.vertices_out = key->input_vertices;
-      nir->info.name = ralloc_strdup(nir, "passthrough");
+      nir = create_passthrough_tcs(compiler, options, key);
    }
 
    memset(&prog_data, 0, sizeof(prog_data));
index 804394808e010ac3081419781ccfc039cef82b30..5096f135124c1171d6d0b89eb59d6f0b166f19e6 100644 (file)
@@ -47,54 +47,6 @@ vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
 }
 
 
-void
-vec4_tcs_visitor::emit_nir_code()
-{
-   if (key->program_string_id != 0) {
-      /* We have a real application-supplied TCS, emit real code. */
-      vec4_visitor::emit_nir_code();
-   } else {
-      /* There is no TCS; automatically generate a passthrough shader
-       * that writes the API-specified default tessellation levels and
-       * copies VS outputs to TES inputs.
-       */
-      uniforms = 2;
-
-      uint64_t varyings = key->outputs_written;
-
-      src_reg vertex_offset(this, glsl_type::uint_type);
-      emit(MUL(dst_reg(vertex_offset), invocation_id,
-               brw_imm_ud(prog_data->vue_map.num_per_vertex_slots)));
-
-      while (varyings != 0) {
-         const int varying = ffsll(varyings) - 1;
-
-         unsigned in_offset = input_vue_map->varying_to_slot[varying];
-         unsigned out_offset = prog_data->vue_map.varying_to_slot[varying];
-         assert(out_offset >= 2);
-
-         dst_reg val(this, glsl_type::vec4_type);
-         emit_input_urb_read(val, invocation_id, in_offset, src_reg());
-         emit_urb_write(src_reg(val), WRITEMASK_XYZW, out_offset,
-                        vertex_offset);
-
-         varyings &= ~BITFIELD64_BIT(varying);
-      }
-
-      /* Only write the tessellation factors from invocation 0.
-       * There's no point in making other threads do redundant work.
-       */
-      emit(CMP(dst_null_d(), invocation_id, brw_imm_ud(0),
-               BRW_CONDITIONAL_EQ));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      emit_urb_write(src_reg(UNIFORM, 0, glsl_type::vec4_type),
-                     WRITEMASK_XYZW, 0, src_reg());
-      emit_urb_write(src_reg(UNIFORM, 1, glsl_type::vec4_type),
-                     WRITEMASK_XYZW, 1, src_reg());
-      emit(BRW_OPCODE_ENDIF);
-   }
-}
-
 void
 vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
 {
@@ -393,7 +345,10 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg indirect_offset = get_indirect_offset(instr);
       unsigned imm_offset = instr->const_index[0];
 
-      if (indirect_offset.file == BAD_FILE) {
+      /* The passthrough shader writes the whole patch header as two vec4s;
+       * skip all the gl_TessLevelInner/Outer swizzling.
+       */
+      if (indirect_offset.file == BAD_FILE && key->program_string_id != 0) {
          if (imm_offset == 0) {
             value.type = BRW_REGISTER_TYPE_F;
 
index 2c6801b2ae3da8976dab785ba031ed4bf77bb1d6..a6de2b17e97cec952892dc81b865196d8087ecea 100644 (file)
@@ -49,7 +49,6 @@ public:
                     const struct brw_vue_map *input_vue_map);
 
 protected:
-   virtual void emit_nir_code();
    virtual dst_reg *make_reg_for_system_value(int location,
                                               const glsl_type *type);
    virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);