intel/compiler: Introduce backend_shader method to propagate IR changes to analysis...

[mesa.git] / src / intel / compiler / brw_fs.h
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h

index 5236fff4b6e2bbbd873498bd8c91b1ae900dc534..ee04eba52cf93d09fe8e085b9bc04a157c852362 100644 (file)
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -31,6 +31,7 @@
  #include "brw_shader.h"
  #include "brw_ir_fs.h"
  #include "brw_fs_builder.h"
+#include "brw_fs_live_variables.h"
  #include "compiler/nir/nir.h"
  
  struct bblock_t;
@@ -39,7 +40,6 @@ namespace {
  }
  
  namespace brw {
-   class fs_live_variables;
  }
  
  struct brw_gs_compile;
@@ -107,6 +107,7 @@ public:
     void setup_cs_payload();
     bool fixup_sends_duplicate_payload();
     void fixup_3src_null_dest();
+   bool fixup_nomask_control_flow();
     void assign_curb_setup();
     void assign_urb_setup();
     void convert_attr_sources_to_hw_regs(fs_inst *inst);
@@ -125,6 +126,7 @@ public:
                        unsigned *out_pull_index);
     void lower_constant_loads();
     void invalidate_live_intervals();
+   virtual void invalidate_analysis(brw::analysis_dependency_class c);
     void calculate_live_intervals();
     void calculate_register_pressure();
     void validate();
@@ -167,7 +169,9 @@ public:
     bool lower_integer_multiplication();
     bool lower_minmax();
     bool lower_simd_width();
+   bool lower_barycentrics();
     bool lower_scoreboard();
+   bool lower_sub_sat();
     bool opt_combine_constants();
  
     void emit_dummy_fs();
@@ -302,8 +306,6 @@ public:
  
     fs_reg interp_reg(int location, int channel);
  
-   int implied_mrf_writes(const fs_inst *inst) const;
-
     virtual void dump_instructions();
     virtual void dump_instructions(const char *name);
     void dump_instruction(backend_instruction *inst);
@@ -412,8 +414,23 @@ private:
     void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
     void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
     void lower_mulh_inst(fs_inst *inst, bblock_t *block);
+
+   unsigned workgroup_size() const;
  };
  
+/**
+ * Return the flag register used in fragment shaders to keep track of live
+ * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
+ * dispatch mode, while earlier generations are constrained to f0.1, which
+ * limits the dispatch width to SIMD16 for fragment shaders that use discard.
+ */
+static inline unsigned
+sample_mask_flag_subreg(const fs_visitor *shader)
+{
+   assert(shader->stage == MESA_SHADER_FRAGMENT);
+   return shader->devinfo->gen >= 7 ? 2 : 1;
+}
+
  /**
   * The fragment shader code generator.
   *
@@ -541,25 +558,21 @@ private:
  namespace brw {
     inline fs_reg
     fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
-                     brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1)
+                     brw_reg_type type = BRW_REGISTER_TYPE_F)
     {
        if (!regs[0])
           return fs_reg();
  
        if (bld.dispatch_width() > 16) {
-         const fs_reg tmp = bld.vgrf(type, n);
+         const fs_reg tmp = bld.vgrf(type);
           const brw::fs_builder hbld = bld.exec_all().group(16, 0);
           const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
-         fs_reg *const components = new fs_reg[n * m];
+         fs_reg *const components = new fs_reg[m];
  
-         for (unsigned c = 0; c < n; c++) {
-            for (unsigned g = 0; g < m; g++) {
-               components[c * m + g] =
-                  offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
-            }
-         }
+         for (unsigned g = 0; g < m; g++)
+               components[g] = retype(brw_vec8_grf(regs[g], 0), type);
  
-         hbld.LOAD_PAYLOAD(tmp, components, n * m, 0);
+         hbld.LOAD_PAYLOAD(tmp, components, m, 0);
  
           delete[] components;
           return tmp;
@@ -569,6 +582,29 @@ namespace brw {
        }
     }
  
+   inline fs_reg
+   fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
+   {
+      if (!regs[0])
+         return fs_reg();
+
+      const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
+      const brw::fs_builder hbld = bld.exec_all().group(8, 0);
+      const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
+      fs_reg *const components = new fs_reg[2 * m];
+
+      for (unsigned c = 0; c < 2; c++) {
+         for (unsigned g = 0; g < m; g++)
+            components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
+                                           hbld, c + 2 * (g % 2));
+      }
+
+      hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
+
+      delete[] components;
+      return tmp;
+   }
+
     bool
     lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
  }