From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 18 May 2016 21:39:52 +0000 (-0700)
Subject: i965/fs: Extend back-end interface for limiting the shader dispatch width.
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=37fd13ee2daf1dbd80cc7b43f7dcfdd1bb64bcc7;p=mesa.git

i965/fs: Extend back-end interface for limiting the shader dispatch width.

This replaces the current fs_visitor::no16() interface with
fs_visitor::limit_dispatch_width(), which takes an additional
parameter allowing the caller to specify the maximum dispatch width a
shader can be compiled with.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cfe9f023537..061d5555230 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -668,24 +668,26 @@ fs_visitor::fail(const char *format, ...)
 }
 
 /**
- * Mark this program as impossible to compile in SIMD16 mode.
+ * Mark this program as impossible to compile with dispatch width greater
+ * than n.
  *
  * During the SIMD8 compile (which happens first), we can detect and flag
- * things that are unsupported in SIMD16 mode, so the compiler can skip
- * the SIMD16 compile altogether.
+ * things that are unsupported in SIMD16+ mode, so the compiler can skip the
+ * SIMD16+ compile altogether.
  *
- * During a SIMD16 compile (if one happens anyway), this just calls fail().
+ * During a compile of dispatch width greater than n (if one happens anyway),
+ * this just calls fail().
  */
 void
-fs_visitor::no16(const char *msg)
+fs_visitor::limit_dispatch_width(unsigned n, const char *msg)
 {
-   if (dispatch_width == 16) {
+   if (dispatch_width > n) {
       fail("%s", msg);
    } else {
-      simd16_unsupported = true;
-
+      max_dispatch_width = n;
       compiler->shader_perf_log(log_data,
-                                "SIMD16 shader failed to compile: %s", msg);
+                                "Shader dispatch width limited to SIMD%d: %s",
+                                n, msg);
    }
 }
 
@@ -6328,7 +6330,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
       simd8_grf_used = v8.grf_used;
    }
 
-   if (!v8.simd16_unsupported &&
+   if (v8.max_dispatch_width >= 16 &&
        likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
       /* Try a SIMD16 compile */
       fs_visitor v16(compiler, log_data, mem_ctx, key,
@@ -6501,8 +6503,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
                  NULL, /* Never used in core profile */
                  shader, 16, shader_time_index);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
-       !fail_msg && !v8.simd16_unsupported &&
-       local_workgroup_size <= 16 * max_cs_threads) {
+       !fail_msg && v8.max_dispatch_width >= 16 &&
+       simd_required <= 16) {
       /* Try a SIMD16 compile */
       if (simd_required <= 8)
          v16.import_uniforms(&v8);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c249c82576a..d28384d8216 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -170,7 +170,7 @@ public:
                                                      fs_inst *inst);
    void vfail(const char *msg, va_list args);
    void fail(const char *msg, ...);
-   void no16(const char *msg);
+   void limit_dispatch_width(unsigned n, const char *msg);
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
    bool lower_pack();
@@ -356,8 +356,6 @@ public:
 
    bool failed;
    char *fail_msg;
-   bool simd16_unsupported;
-   char *no16_msg;
 
    /** Register numbers for thread payload fields. */
    struct thread_payload {
@@ -391,8 +389,9 @@ public:
    unsigned grf_used;
    bool spilled_any_registers;
 
-   const unsigned dispatch_width; /**< 8 or 16 */
+   const unsigned dispatch_width; /**< 8, 16 or 32 */
    unsigned min_dispatch_width;
+   unsigned max_dispatch_width;
 
    int shader_time_index;
 
@@ -505,7 +504,7 @@ private:
    const void * const key;
    struct brw_stage_prog_data * const prog_data;
 
-   unsigned dispatch_width; /**< 8 or 16 */
+   unsigned dispatch_width; /**< 8, 16 or 32 */
 
    exec_list discard_halt_patches;
    unsigned promoted_constants;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index c220f1c9601..25e1a445136 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -424,17 +424,16 @@ fs_visitor::emit_fb_writes()
        * sounds because the SIMD8 single-source message lacks channel selects
        * for the second and third subspans.
        */
-      no16("Missing support for simd16 depth writes on gen6\n");
+      limit_dispatch_width(8, "Depth writes unsupported in SIMD16+ mode.\n");
    }
 
    if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
       /* From the 'Render Target Write message' section of the docs:
        * "Output Stencil is not supported with SIMD16 Render Target Write
        * Messages."
-       *
-       * FINISHME: split 16 into 2 8s
        */
-      no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+      limit_dispatch_width(8, "gl_FragStencilRefARB unsupported "
+                           "in SIMD16+ mode.\n");
    }
 
    if (do_dual_src) {
@@ -885,11 +884,10 @@ fs_visitor::init()
       min_dispatch_width = 8;
    }
 
+   this->max_dispatch_width = 32;
    this->prog_data = this->stage_prog_data;
 
    this->failed = false;
-   this->simd16_unsupported = false;
-   this->no16_msg = NULL;
 
    this->nir_locals = NULL;
    this->nir_ssa_values = NULL;