intel/compiler: Implement TCS 8_PATCH mode and INTEL_DEBUG=tcs8

author Kenneth Graunke <kenneth@whitecape.org>

Fri, 3 May 2019 21:57:54 +0000 (14:57 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Tue, 14 May 2019 20:16:30 +0000 (13:16 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Fri, 3 May 2019 21:57:54 +0000 (14:57 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Tue, 14 May 2019 20:16:30 +0000 (13:16 -0700)
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c

index 30ec3f1ff86e9309bff17c86e50dd87db24b575d..d5c5a32bbc40afb983b735cbdd37464ad08f1654 100644 (file)
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -1526,6 +1526,7 @@ iris_create_tcs_state(struct pipe_context *ctx,
  {
     struct iris_context *ice = (void *) ctx;
     struct iris_screen *screen = (void *) ctx->screen;
+   const struct brw_compiler *compiler = screen->compiler;
     struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
     struct shader_info *info = &ish->nir->info;
  
@@ -1544,6 +1545,14 @@ iris_create_tcs_state(struct pipe_context *ctx,
           .patch_outputs_written = info->patch_outputs_written,
        };
  
+      /* 8_PATCH mode needs the key to contain the input patch dimensionality.
+       * We don't have that information, so we randomly guess that the input
+       * and output patches are the same size.  This is a bad guess, but we
+       * can't do much better.
+       */
+      if (compiler->use_tcs_8_patch)
+         key.input_vertices = info->tess.tcs_vertices_out;
+
        iris_compile_tcs(ice, ish, &key);
     }
  
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c

index a9af1cd0dc4a782907ed67b558e222c14e1fd1df..f48bacf77e1408c2aa068d9a3f26ecebc5d4d5a6 100644 (file)
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -3651,6 +3651,11 @@ iris_store_tcs_state(struct iris_context *ice,
        hs.InstanceCount = tcs_prog_data->instances - 1;
        hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
        hs.IncludeVertexHandles = true;
+
+#if GEN_GEN >= 9
+      hs.DispatchMode = vue_prog_data->dispatch_mode;
+      hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
     }
  }
  
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c

index 626ff4bb16aaab322f166dd751371d3d9df86716..6a41cd20270709f06ad18fc577a9ea4df840e5a9 100644 (file)
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -99,6 +99,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
  
     compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
  
+   compiler->use_tcs_8_patch =
+      devinfo->gen >= 9 && (INTEL_DEBUG & DEBUG_TCS_EIGHT_PATCH);
+
     if (devinfo->gen >= 10) {
        /* We don't support vec4 mode on Cannonlake. */
        for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h

index 332789d7809bf5ace7fcda575f67cf1833cd1b2a..8c6ae35636bb0f84cd6815cf53ad3db476cc3440 100644 (file)
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -93,6 +93,7 @@ struct brw_compiler {
     void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
  
     bool scalar_stage[MESA_SHADER_STAGES];
+   bool use_tcs_8_patch;
     struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
  
     /**
@@ -1002,6 +1003,9 @@ enum shader_dispatch_mode {
     DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
     DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
     DISPATCH_MODE_SIMD8 = 3,
+
+   DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
+   DISPATCH_MODE_TCS_8_PATCH = 2,
  };
  
  /**
@@ -1074,6 +1078,9 @@ struct brw_tcs_prog_data
  {
     struct brw_vue_prog_data base;
  
+   /** Should the non-SINGLE_PATCH payload provide primitive ID? */
+   bool include_primitive_id;
+
     /** Number vertices in output patch */
     int instances;
  };
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index 9d05800d1fe03ab810e4bd852e34670f5987505a..9b4e030b54fbfc229d045d7571b90256129ba928 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1805,7 +1805,7 @@ fs_visitor::assign_vs_urb_setup()
  }
  
  void
-fs_visitor::assign_tcs_single_patch_urb_setup()
+fs_visitor::assign_tcs_urb_setup()
  {
     assert(stage == MESA_SHADER_TESS_CTRL);
  
@@ -7396,12 +7396,28 @@ void
  fs_visitor::set_tcs_invocation_id()
  {
     struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
  
     const unsigned instance_id_mask =
        devinfo->gen >= 11 ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
     const unsigned instance_id_shift =
        devinfo->gen >= 11 ? 16 : 17;
  
+   /* Get instance number from g0.2 bits 22:16 or 23:17 */
+   fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);
+   bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),
+           brw_imm_ud(instance_id_mask));
+
+   invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+   if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH) {
+      /* gl_InvocationID is just the thread number */
+      bld.SHR(invocation_id, t, brw_imm_ud(instance_id_shift));
+      return;
+   }
+
+   assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH);
+
     fs_reg channels_uw = bld.vgrf(BRW_REGISTER_TYPE_UW);
     fs_reg channels_ud = bld.vgrf(BRW_REGISTER_TYPE_UD);
     bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210)));
@@ -7410,26 +7426,36 @@ fs_visitor::set_tcs_invocation_id()
     if (tcs_prog_data->instances == 1) {
        invocation_id = channels_ud;
     } else {
-      invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
-
-      /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */
-      fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);
        fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD);
-      bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),
-              brw_imm_ud(instance_id_mask));
        bld.SHR(instance_times_8, t, brw_imm_ud(instance_id_shift - 3));
-
        bld.ADD(invocation_id, instance_times_8, channels_ud);
     }
  }
  
  bool
-fs_visitor::run_tcs_single_patch()
+fs_visitor::run_tcs()
  {
     assert(stage == MESA_SHADER_TESS_CTRL);
  
-   /* r1-r4 contain the ICP handles. */
-   payload.num_regs = 5;
+   struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+   struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+   struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
+
+   assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH ||
+          vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+
+   if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) {
+      /* r1-r4 contain the ICP handles. */
+      payload.num_regs = 5;
+   } else {
+      assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+      assert(tcs_key->input_vertices > 0);
+      /* r1 contains output handles, r2 may contain primitive ID, then the
+       * ICP handles occupy the next 1-32 registers.
+       */
+      payload.num_regs = 2 + tcs_prog_data->include_primitive_id +
+                         tcs_key->input_vertices;
+   }
  
     if (shader_time_index >= 0)
        emit_shader_time_begin();
@@ -7438,6 +7464,7 @@ fs_visitor::run_tcs_single_patch()
     set_tcs_invocation_id();
  
     const bool fix_dispatch_mask =
+      vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH &&
        (nir->info.tess.tcs_vertices_out % 8) != 0;
  
     /* Fix the disptach mask */
@@ -7455,7 +7482,7 @@ fs_visitor::run_tcs_single_patch()
  
     /* Emit EOT write; set TR DS Cache bit */
     fs_reg srcs[3] = {
-      fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+      fs_reg(get_tcs_output_urb_handle()),
        fs_reg(brw_imm_ud(WRITEMASK_X << 16)),
        fs_reg(brw_imm_ud(0)),
     };
@@ -7478,7 +7505,7 @@ fs_visitor::run_tcs_single_patch()
     optimize();
  
     assign_curb_setup();
-   assign_tcs_single_patch_urb_setup();
+   assign_tcs_urb_setup();
  
     fixup_3src_null_dest();
     allocate_registers(8, true);
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h

index 8ae73401cdf48c8c7ef0d02a3e1806e305bcc00f..7db486688af0346b29a0dd646abc3153e568bcff 100644 (file)
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -92,7 +92,7 @@ public:
  
     bool run_fs(bool allow_spilling, bool do_rep_send);
     bool run_vs();
-   bool run_tcs_single_patch();
+   bool run_tcs();
     bool run_tes();
     bool run_gs();
     bool run_cs(unsigned min_dispatch_width);
@@ -110,7 +110,7 @@ public:
     void assign_urb_setup();
     void convert_attr_sources_to_hw_regs(fs_inst *inst);
     void assign_vs_urb_setup();
-   void assign_tcs_single_patch_urb_setup();
+   void assign_tcs_urb_setup();
     void assign_tes_urb_setup();
     void assign_gs_urb_setup();
     bool assign_regs(bool allow_spilling, bool spill_all);
@@ -251,6 +251,9 @@ public:
     fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
     fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
                                            nir_intrinsic_instr *instr);
+   fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
+                                         nir_intrinsic_instr *instr);
+   struct brw_reg get_tcs_output_urb_handle();
  
     void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
                       unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index 73e2f38145e5a3d4cdbbcd569c2506b50ab28a09..a2c8f3f557fecd1deb40e51c1c223a04b3a34894 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2605,6 +2605,73 @@ fs_visitor::get_tcs_single_patch_icp_handle(const fs_builder &bld,
     return icp_handle;
  }
  
+fs_reg
+fs_visitor::get_tcs_eight_patch_icp_handle(const fs_builder &bld,
+                                           nir_intrinsic_instr *instr)
+{
+   struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
+   struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+   const nir_src &vertex_src = instr->src[0];
+
+   unsigned first_icp_handle = tcs_prog_data->include_primitive_id ? 3 : 2;
+
+   if (nir_src_is_const(vertex_src)) {
+      return fs_reg(retype(brw_vec8_grf(first_icp_handle +
+                                        nir_src_as_uint(vertex_src), 0),
+                           BRW_REGISTER_TYPE_UD));
+   }
+
+   /* The vertex index is non-constant.  We need to use indirect
+    * addressing to fetch the proper URB handle.
+    *
+    * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0>
+    * indicating that channel <n> should read the handle from
+    * DWord <n>.  We convert that to bytes by multiplying by 4.
+    *
+    * Next, we convert the vertex index to bytes by multiplying
+    * by 32 (shifting by 5), and add the two together.  This is
+    * the final indirect byte offset.
+    */
+   fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_UW, 1);
+   fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+
+   /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */
+   bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210)));
+   /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */
+   bld.SHL(channel_offsets, sequence, brw_imm_ud(2u));
+   /* Convert vertex_index to bytes (multiply by 32) */
+   bld.SHL(vertex_offset_bytes,
+           retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(5u));
+   bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
+
+   /* Use first_icp_handle as the base offset.  There is one register
+    * of URB handles per vertex, so inform the register allocator that
+    * we might read up to nir->info.gs.vertices_in registers.
+    */
+   bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
+            retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
+            icp_offset_bytes, brw_imm_ud(tcs_key->input_vertices * REG_SIZE));
+
+   return icp_handle;
+}
+
+struct brw_reg
+fs_visitor::get_tcs_output_urb_handle()
+{
+   struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+
+   if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) {
+      return retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
+   } else {
+      assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+      return retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
+   }
+}
+
  void
  fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                                     nir_intrinsic_instr *instr)
@@ -2612,6 +2679,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
     assert(stage == MESA_SHADER_TESS_CTRL);
     struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
     struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+
+   bool eight_patch =
+      vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH;
  
     fs_reg dst;
     if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -2619,7 +2690,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
  
     switch (instr->intrinsic) {
     case nir_intrinsic_load_primitive_id:
-      bld.MOV(dst, fs_reg(brw_vec1_grf(0, 1)));
+      bld.MOV(dst, fs_reg(eight_patch ? brw_vec8_grf(2, 0)
+                                      : brw_vec1_grf(0, 1)));
        break;
     case nir_intrinsic_load_invocation_id:
        bld.MOV(retype(dst, invocation_id.type), invocation_id);
@@ -2675,7 +2747,9 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
        unsigned imm_offset = instr->const_index[0];
        fs_inst *inst;
  
-      fs_reg icp_handle = get_tcs_single_patch_icp_handle(bld, instr);
+      fs_reg icp_handle =
+         eight_patch ? get_tcs_eight_patch_icp_handle(bld, instr)
+                     : get_tcs_single_patch_icp_handle(bld, instr);
  
        /* We can only read two double components with each URB read, so
         * we send two read messages in that case, each one loading up to
@@ -2776,12 +2850,15 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
        unsigned imm_offset = instr->const_index[0];
        unsigned first_component = nir_intrinsic_component(instr);
  
+      struct brw_reg output_handles = get_tcs_output_urb_handle();
+
        fs_inst *inst;
        if (indirect_offset.file == BAD_FILE) {
-         /* Replicate the patch handle to all enabled channels */
+         /* This MOV replicates the output handle to all enabled channels
+          * is SINGLE_PATCH mode.
+          */
           fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-         bld.MOV(patch_handle,
-                 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+         bld.MOV(patch_handle, output_handles);
  
           {
              if (first_component != 0) {
@@ -2805,10 +2882,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
           }
        } else {
           /* Indirect indexing - use per-slot offsets as well. */
-         const fs_reg srcs[] = {
-            retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
-            indirect_offset
-         };
+         const fs_reg srcs[] = { output_handles, indirect_offset };
           fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
           bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
           if (first_component != 0) {
@@ -2842,8 +2916,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
        unsigned imm_offset = instr->const_index[0];
        unsigned mask = instr->const_index[1];
        unsigned header_regs = 0;
+      struct brw_reg output_handles = get_tcs_output_urb_handle();
+
        fs_reg srcs[7];
-      srcs[header_regs++] = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
+      srcs[header_regs++] = output_handles;
  
        if (indirect_offset.file != BAD_FILE) {
           srcs[header_regs++] = indirect_offset;
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp

index f0ef8c0dd96f3df3af1536c4653fad280da28fb3..c37f34cbe8136ade2887ea5b8b01d801fb48c94e 100644 (file)
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@@ -406,10 +406,26 @@ brw_compile_tcs(const struct brw_compiler *compiler,
  
     nir = brw_postprocess_nir(nir, compiler, is_scalar);
  
-   if (is_scalar)
-      prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 8);
-   else
-      prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 2);
+   bool has_primitive_id =
+      nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID);
+
+   if (compiler->use_tcs_8_patch &&
+       nir->info.tess.tcs_vertices_out <= 16 &&
+       2 + has_primitive_id + key->input_vertices <= 31) {
+      /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode.  First,
+       * the "Instance" field limits the number of output vertices to [1, 16].
+       * Secondly, the "Dispatch GRF Start Register for URB Data" field is
+       * limited to [0, 31] - which imposes a limit on the input vertices.
+       */
+      vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH;
+      prog_data->instances = nir->info.tess.tcs_vertices_out;
+      prog_data->include_primitive_id = has_primitive_id;
+   } else {
+      unsigned verts_per_thread = is_scalar ? 8 : 2;
+      vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_SINGLE_PATCH;
+      prog_data->instances =
+         DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
+   }
  
     /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
      * That divides up as follows:
@@ -462,14 +478,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
        fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
                     &prog_data->base.base, NULL, nir, 8,
                     shader_time_index, &input_vue_map);
-      if (!v.run_tcs_single_patch()) {
+      if (!v.run_tcs()) {
           if (error_str)
              *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
           return NULL;
        }
  
        prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
-      prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
  
        fs_generator g(compiler, log_data, mem_ctx,
                       &prog_data->base.base, v.promoted_constants, false,
diff --git a/src/intel/dev/gen_debug.c b/src/intel/dev/gen_debug.c

index b0d0d1a574a9dd2cc6f30d9fa9acf24df75498e1..5fa3d29fa513dc23aa3ad9be72cac94335b19a4f 100644 (file)
--- a/src/intel/dev/gen_debug.c
+++ b/src/intel/dev/gen_debug.c
@@ -86,6 +86,7 @@ static const struct debug_control debug_control[] = {
     { "color",       DEBUG_COLOR },
     { "reemit",      DEBUG_REEMIT },
     { "soft64",      DEBUG_SOFT64 },
+   { "tcs8",        DEBUG_TCS_EIGHT_PATCH },
     { NULL,    0 }
  };
  
diff --git a/src/intel/dev/gen_debug.h b/src/intel/dev/gen_debug.h

index e4dabc67f8d4da764f6e4e566f58f12e2e465530..a6592354a64bdd21bfc4326b3a2af6c7d3147658 100644 (file)
--- a/src/intel/dev/gen_debug.h
+++ b/src/intel/dev/gen_debug.h
@@ -84,6 +84,7 @@ extern uint64_t INTEL_DEBUG;
  #define DEBUG_COLOR               (1ull << 40)
  #define DEBUG_REEMIT              (1ull << 41)
  #define DEBUG_SOFT64              (1ull << 42)
+#define DEBUG_TCS_EIGHT_PATCH     (1ull << 43)
  
  /* These flags are not compatible with the disk shader cache */
  #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
@@ -91,7 +92,8 @@ extern uint64_t INTEL_DEBUG;
  /* These flags may affect program generation */
  #define DEBUG_DISK_CACHE_MASK \
     (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 |  DEBUG_SPILL_FS | \
-   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)
+   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \
+   DEBUG_TCS_EIGHT_PATCH)
  
  #ifdef HAVE_ANDROID_PLATFORM
  #define LOG_TAG "INTEL-MESA"
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index 0b58dce05b0d52ace091cac1d3001c10e6a40f09..6b64f7ea8c7008c33e3528f635df96dd920f1da3 100644 (file)
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1434,6 +1434,11 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline,
        hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
        hs.ScratchSpaceBasePointer =
           get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin);
+
+#if GEN_GEN >= 9
+      hs.DispatchMode = tcs_prog_data->base.dispatch_mode;
+      hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
     }
  
     const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c

index 1c4d79886388e0aaedb47d5f2345c3e3b2326f94..1050850bb1c1e2912e7c504701a77215b5cb6814 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -160,6 +160,7 @@ brw_tcs_populate_key(struct brw_context *brw,
                       struct brw_tcs_prog_key *key)
  {
     const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   const struct brw_compiler *compiler = brw->screen->compiler;
     struct brw_program *tcp =
        (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
     struct brw_program *tep =
@@ -177,7 +178,7 @@ brw_tcs_populate_key(struct brw_context *brw,
        per_patch_slots |= prog->info.patch_outputs_written;
     }
  
-   if (devinfo->gen < 8 || !tcp)
+   if (devinfo->gen < 8 || !tcp || compiler->use_tcs_8_patch)
        key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
     key->outputs_written = per_vertex_slots;
     key->patch_outputs_written = per_patch_slots;
@@ -251,7 +252,7 @@ brw_tcs_populate_default_key(const struct brw_compiler *compiler,
     brw_setup_tex_for_precompile(devinfo, &key->tex, prog);
  
     /* Guess that the input and output patches have the same dimensionality. */
-   if (devinfo->gen < 8)
+   if (devinfo->gen < 8 || compiler->use_tcs_8_patch)
        key->input_vertices = prog->info.tess.tcs_vertices_out;
  
     if (tes) {
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c

index ecffa2e8e86008289a96d99e3435d8747ced2903..961306b04fd52a24cd5d87da8fefcf40138999d9 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4111,6 +4111,11 @@ genX(upload_hs_state)(struct brw_context *brw)
           hs.IncludeVertexHandles = true;
  
           hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
+
+#if GEN_GEN >= 9
+         hs.DispatchMode = vue_prog_data->dispatch_mode;
+         hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
        }
     }
  }
author	Kenneth Graunke <kenneth@whitecape.org>
	Fri, 3 May 2019 21:57:54 +0000 (14:57 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Tue, 14 May 2019 20:16:30 +0000 (13:16 -0700)
src/gallium/drivers/iris/iris_program.c		patch \| blob \| history
src/gallium/drivers/iris/iris_state.c		patch \| blob \| history
src/intel/compiler/brw_compiler.c		patch \| blob \| history
src/intel/compiler/brw_compiler.h		patch \| blob \| history
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_fs.h		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_tcs.cpp		patch \| blob \| history
src/intel/dev/gen_debug.c		patch \| blob \| history
src/intel/dev/gen_debug.h		patch \| blob \| history
src/intel/vulkan/genX_pipeline.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_tcs.c		patch \| blob \| history
src/mesa/drivers/dri/i965/genX_state_upload.c		patch \| blob \| history