i965: Don't set interleave or complete on TCS EOT message.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 24 Dec 2015 21:09:26 +0000 (13:09 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 28 Dec 2015 21:17:03 +0000 (13:17 -0800)
Setting interleave on the TCS EOT message causes Ivybridge hardware to
GPU hang like crazy.  Individual tests would pass, but running even a
simple test like nop.shader_test in a loop would hang within 1-3 runs.
Adding sleep delays worked around the problem, somehow.

Interleave doesn't make much sense given that we only have one patch
URB handle, not two.  Complete doesn't seem useful either.

There's no reason to actually set those bits.  We were just being lazy.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp

index d0137481c6c658a2b139bed47bba55cd5f9ef3fd..10a6d39db8546929f3bfc084673bf5e6a12aadf9 100644 (file)
@@ -1315,6 +1315,7 @@ enum opcode {
    TCS_OPCODE_CREATE_BARRIER_HEADER,
    TCS_OPCODE_SRC0_010_IS_ZERO,
    TCS_OPCODE_RELEASE_INPUT,
+   TCS_OPCODE_THREAD_END,
 
    TES_OPCODE_GET_PRIMITIVE_ID,
    TES_OPCODE_CREATE_INPUT_READ_HEADER,
index f692bc2de3598b85a5df9f8ff708972b409b10db..d4b6410815e98ba18a960f9f518afc68c234c879 100644 (file)
@@ -572,6 +572,8 @@ brw_instruction_name(enum opcode op)
       return "tcs_src0<0,1,0>_is_zero";
    case TCS_OPCODE_RELEASE_INPUT:
       return "tcs_release_input";
+   case TCS_OPCODE_THREAD_END:
+      return "tcs_thread_end";
    case TES_OPCODE_CREATE_INPUT_READ_HEADER:
       return "tes_create_input_read_header";
    case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
index f1c3d37ce1c4ea534cb537a6187328d2a17df080..f0f18ca776801fd3380f7c7fc4e8297a2535ceb9 100644 (file)
@@ -276,6 +276,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_POW:
       return 2;
    case VS_OPCODE_URB_WRITE:
+   case TCS_OPCODE_THREAD_END:
       return 1;
    case VS_OPCODE_PULL_CONSTANT_LOAD:
       return 2;
index cce2b4d1f4ccb02731955630d0ca7deeabbc3ca2..6b03a1c3db5c272a2626dd57bf18ac9353e5541a 100644 (file)
@@ -758,8 +758,12 @@ generate_tcs_urb_write(struct brw_codegen *p,
                               true /* header */, false /* eot */);
    brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
    brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
-   brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
-   brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+   if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
+      brw_inst_set_eot(devinfo, send, 1);
+   } else {
+      brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+      brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+   }
 
    /* what happens to swizzles? */
 }
@@ -968,6 +972,30 @@ generate_tcs_release_input(struct brw_codegen *p,
                                     BRW_URB_SWIZZLE_INTERLEAVE);
 }
 
+static void
+generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
+{
+   struct brw_reg header = brw_message_reg(inst->base_mrf);
+
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, header, brw_imm_ud(0));
+   brw_MOV(p, get_element_ud(header, 0),
+           retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+   brw_pop_insn_state(p);
+
+   brw_urb_WRITE(p,
+                 brw_null_reg(), /* dest */
+                 inst->base_mrf, /* starting mrf reg nr */
+                 header,
+                 BRW_URB_WRITE_EOT | inst->urb_write_flags,
+                 inst->mlen,
+                 0,              /* response len */
+                 0,              /* urb destination offset */
+                 0);
+}
+
 static void
 generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
 {
@@ -1892,6 +1920,10 @@ generate_code(struct brw_codegen *p,
          generate_tcs_release_input(p, dst, src[0], src[1]);
          break;
 
+      case TCS_OPCODE_THREAD_END:
+         generate_tcs_thread_end(p, inst);
+         break;
+
       case SHADER_OPCODE_BARRIER:
          brw_barrier(p, src[0]);
          brw_WAIT(p);
index 7693f095a52c5836697a17b5e488df6a3278d712..fb6ca8ee5f9f2718eefbc5f1195a8efd6307aaba 100644 (file)
@@ -203,9 +203,9 @@ vec4_tcs_visitor::emit_thread_end()
    if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
       emit_shader_time_end();
 
-   inst = emit(VS_OPCODE_URB_WRITE);
-   inst->mlen = 1;   /* just the header, no data. */
-   inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
+   inst = emit(TCS_OPCODE_THREAD_END);
+   inst->base_mrf = 14;
+   inst->mlen = 1;
 }