i965: Accurately bail on SIMD16 compiles.

author Kenneth Graunke <kenneth@whitecape.org>

Fri, 7 Mar 2014 08:49:45 +0000 (00:49 -0800)

committer Kenneth Graunke <kenneth@whitecape.org>

Tue, 18 Mar 2014 17:11:38 +0000 (10:11 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Fri, 7 Mar 2014 08:49:45 +0000 (00:49 -0800)
committer Kenneth Graunke <kenneth@whitecape.org>
Tue, 18 Mar 2014 17:11:38 +0000 (10:11 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 1f8590143fe03589fa61ba7c0bb4325d3cb308ff..1b32d634a84ffed78ea684fb0277f5909cfd1ebc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -647,9 +647,8 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
  }
  
  void
-fs_visitor::fail(const char *format, ...)
+fs_visitor::vfail(const char *format, va_list va)
  {
-   va_list va;
     char *msg;
  
     if (failed)
@@ -657,9 +656,7 @@ fs_visitor::fail(const char *format, ...)
  
     failed = true;
  
-   va_start(va, format);
     msg = ralloc_vasprintf(mem_ctx, format, va);
-   va_end(va);
     msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg);
  
     this->fail_msg = msg;
@@ -669,6 +666,48 @@ fs_visitor::fail(const char *format, ...)
     }
  }
  
+void
+fs_visitor::fail(const char *format, ...)
+{
+   va_list va;
+
+   va_start(va, format);
+   vfail(format, va);
+   va_end(va);
+}
+
+/**
+ * Mark this program as impossible to compile in SIMD16 mode.
+ *
+ * During the SIMD8 compile (which happens first), we can detect and flag
+ * things that are unsupported in SIMD16 mode, so the compiler can skip
+ * the SIMD16 compile altogether.
+ *
+ * During a SIMD16 compile (if one happens anyway), this just calls fail().
+ */
+void
+fs_visitor::no16(const char *format, ...)
+{
+   va_list va;
+
+   va_start(va, format);
+
+   if (dispatch_width == 16) {
+      vfail(format, va);
+   } else {
+      simd16_unsupported = true;
+
+      if (INTEL_DEBUG & DEBUG_PERF) {
+         if (no16_msg)
+            ralloc_vasprintf_append(&no16_msg, format, va);
+         else
+            no16_msg = ralloc_vasprintf(mem_ctx, format, va);
+      }
+   }
+
+   va_end(va);
+}
+
  fs_inst *
  fs_visitor::emit(enum opcode opcode)
  {
@@ -1356,8 +1395,8 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
     switch (opcode) {
     case SHADER_OPCODE_INT_QUOTIENT:
     case SHADER_OPCODE_INT_REMAINDER:
-      if (brw->gen >= 7 && dispatch_width == 16)
-        fail("SIMD16 INTDIV unsupported\n");
+      if (brw->gen >= 7)
+        no16("SIMD16 INTDIV unsupported\n");
        break;
     case SHADER_OPCODE_POW:
        break;
@@ -3505,13 +3544,18 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
     exec_list *simd16_instructions = NULL;
     fs_visitor v2(brw, c, prog, fp, 16);
     if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
-      /* Try a SIMD16 compile */
-      v2.import_uniforms(&v);
-      if (!v2.run()) {
-         perf_debug("SIMD16 shader failed to compile, falling back to "
-                    "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+      if (!v.simd16_unsupported) {
+         /* Try a SIMD16 compile */
+         v2.import_uniforms(&v);
+         if (!v2.run()) {
+            perf_debug("SIMD16 shader failed to compile, falling back to "
+                       "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+         } else {
+            simd16_instructions = &v2.instructions;
+         }
        } else {
-         simd16_instructions = &v2.instructions;
+         perf_debug("SIMD16 shader unsupported, falling back to "
+                    "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
        }
     }
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 9de1f3a0a8a40eeb83458c5740b8dc8337973591..0d064f60888ff270e874f6a2d052d5793f1e2e3f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -381,7 +381,9 @@ public:
     void insert_gen4_send_dependency_workarounds();
     void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
     void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
+   void vfail(const char *msg, va_list args);
     void fail(const char *msg, ...);
+   void no16(const char *msg, ...);
     void lower_uniform_pull_constant_loads();
  
     void push_force_uncompressed();
@@ -541,6 +543,8 @@ public:
  
     bool failed;
     char *fail_msg;
+   bool simd16_unsupported;
+   char *no16_msg;
  
     /* Result of last visit() method. */
     fs_reg result;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index cd90e23202cd2ac59c85f673b96b46f91a867d38..a83226883ee6c27b16cf482c6039a78a03c45f73 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -462,8 +462,8 @@ fs_visitor::visit(ir_expression *ir)
           * FINISHME: Emit just the MUL if we know an operand is small
           * enough.
           */
-        if (brw->gen >= 7 && dispatch_width == 16)
-           fail("SIMD16 explicit accumulator operands unsupported\n");
+        if (brw->gen >= 7)
+           no16("SIMD16 explicit accumulator operands unsupported\n");
  
          struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -475,8 +475,8 @@ fs_visitor::visit(ir_expression *ir)
        }
        break;
     case ir_binop_imul_high: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -490,8 +490,8 @@ fs_visitor::visit(ir_expression *ir)
        emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
        break;
     case ir_binop_carry: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -500,8 +500,8 @@ fs_visitor::visit(ir_expression *ir)
        break;
     }
     case ir_binop_borrow: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -1290,8 +1290,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
        break;
     case ir_txd: {
-      if (dispatch_width == 16)
-        fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+      no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
  
        /* Load dPdx and the coordinate together:
         * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
@@ -1364,8 +1363,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        break;
     case ir_tg4:
        if (has_nonconstant_offset) {
-         if (ir->shadow_comparitor && dispatch_width == 16)
-            fail("Gen7 does not support gather4_po_c in SIMD16 mode.");
+         if (ir->shadow_comparitor)
+            no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
  
           /* More crazy intermixing */
           ir->offset->accept(this);
@@ -1464,8 +1463,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
          0
        };
  
+      no16("rectangle scale uniform setup not supported on SIMD16\n");
        if (dispatch_width == 16) {
-        fail("rectangle scale uniform setup not supported on SIMD16\n");
          return coordinate;
        }
  
@@ -2183,8 +2182,8 @@ fs_visitor::try_replace_with_sel()
  void
  fs_visitor::visit(ir_if *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     /* Don't point the annotation at the if statement, because then it plus
@@ -2226,8 +2225,8 @@ fs_visitor::visit(ir_if *ir)
  void
  fs_visitor::visit(ir_loop *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     this->base_ir = NULL;
@@ -2725,9 +2724,10 @@ fs_visitor::emit_fb_writes()
     bool do_dual_src = this->dual_src_output.file != BAD_FILE;
     bool src0_alpha_to_render_target = false;
  
-   if (dispatch_width == 16 && do_dual_src) {
-      fail("GL_ARB_blend_func_extended not yet supported in SIMD16.");
-      do_dual_src = false;
+   if (do_dual_src) {
+      no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
+      if (dispatch_width == 16)
+         do_dual_src = false;
     }
  
     /* From the Sandy Bridge PRM, volume 4, page 198:
@@ -2778,13 +2778,13 @@ fs_visitor::emit_fb_writes()
        nr += reg_width;
  
     if (c->source_depth_to_render_target) {
-      if (brw->gen == 6 && dispatch_width == 16) {
+      if (brw->gen == 6) {
          /* For outputting oDepth on gen6, SIMD8 writes have to be
           * used.  This would require SIMD8 moves of each half to
           * message regs, kind of like pre-gen5 SIMD16 FB writes.
           * Just bail on doing so for now.
           */
-        fail("Missing support for simd16 depth writes on gen6\n");
+        no16("Missing support for simd16 depth writes on gen6\n");
        }
  
        if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
author	Kenneth Graunke <kenneth@whitecape.org>
	Fri, 7 Mar 2014 08:49:45 +0000 (00:49 -0800)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Tue, 18 Mar 2014 17:11:38 +0000 (10:11 -0700)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history