intel/fs: work around gen12 lower-precision source modifier limitation

[mesa.git] / src / intel / compiler / brw_schedule_instructions.cpp
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp

index f10e58cd75d9b326f0bd4c28b177e1930655c0e3..7a0fb1ac684d3e9eac68035615494529ccedbc71 100644 (file)
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -533,16 +533,15 @@ schedule_node::set_latency_gen7(bool is_haswell)
  
  class instruction_scheduler {
  public:
-   instruction_scheduler(backend_shader *s, int grf_count,
+   instruction_scheduler(const backend_shader *s, int grf_count,
                           unsigned hw_reg_count, int block_count,
-                         instruction_scheduler_mode mode)
+                         instruction_scheduler_mode mode):
+      bs(s)
     {
-      this->bs = s;
        this->mem_ctx = ralloc_context(NULL);
        this->grf_count = grf_count;
        this->hw_reg_count = hw_reg_count;
        this->instructions.make_empty();
-      this->instructions_to_schedule = 0;
        this->post_reg_alloc = (mode == SCHEDULE_POST);
        this->mode = mode;
        if (!post_reg_alloc) {
@@ -613,13 +612,12 @@ public:
     void *mem_ctx;
  
     bool post_reg_alloc;
-   int instructions_to_schedule;
     int grf_count;
     unsigned hw_reg_count;
     int reg_pressure;
     int block_idx;
     exec_list instructions;
-   backend_shader *bs;
+   const backend_shader *bs;
  
     instruction_scheduler_mode mode;
  
@@ -669,14 +667,14 @@ public:
  class fs_instruction_scheduler : public instruction_scheduler
  {
  public:
-   fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count,
+   fs_instruction_scheduler(const fs_visitor *v, int grf_count, int hw_reg_count,
                              int block_count,
                              instruction_scheduler_mode mode);
     void calculate_deps();
-   bool is_compressed(fs_inst *inst);
+   bool is_compressed(const fs_inst *inst);
     schedule_node *choose_instruction_to_schedule();
     int issue_time(backend_instruction *inst);
-   fs_visitor *v;
+   const fs_visitor *v;
  
     void count_reads_remaining(backend_instruction *inst);
     void setup_liveness(cfg_t *cfg);
@@ -684,7 +682,7 @@ public:
     int get_register_pressure_benefit(backend_instruction *inst);
  };
  
-fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
+fs_instruction_scheduler::fs_instruction_scheduler(const fs_visitor *v,
                                                     int grf_count, int hw_reg_count,
                                                     int block_count,
                                                     instruction_scheduler_mode mode)
@@ -850,11 +848,11 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
  class vec4_instruction_scheduler : public instruction_scheduler
  {
  public:
-   vec4_instruction_scheduler(vec4_visitor *v, int grf_count);
+   vec4_instruction_scheduler(const vec4_visitor *v, int grf_count);
     void calculate_deps();
     schedule_node *choose_instruction_to_schedule();
     int issue_time(backend_instruction *inst);
-   vec4_visitor *v;
+   const vec4_visitor *v;
  
     void count_reads_remaining(backend_instruction *inst);
     void setup_liveness(cfg_t *cfg);
@@ -862,7 +860,7 @@ public:
     int get_register_pressure_benefit(backend_instruction *inst);
  };
  
-vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
+vec4_instruction_scheduler::vec4_instruction_scheduler(const vec4_visitor *v,
                                                         int grf_count)
     : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST),
       v(v)
@@ -925,8 +923,6 @@ instruction_scheduler::add_insts_from_block(bblock_t *block)
  
        instructions.push_tail(n);
     }
-
-   this->instructions_to_schedule = block->end_ip - block->start_ip + 1;
  }
  
  /** Computation of the delay member of each node. */
@@ -1067,7 +1063,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
   * actually writes 2 MRFs.
   */
  bool
-fs_instruction_scheduler::is_compressed(fs_inst *inst)
+fs_instruction_scheduler::is_compressed(const fs_inst *inst)
  {
     return inst->exec_size == 16;
  }
@@ -1653,10 +1649,12 @@ vec4_instruction_scheduler::choose_instruction_to_schedule()
  }
  
  int
-fs_instruction_scheduler::issue_time(backend_instruction *inst)
+fs_instruction_scheduler::issue_time(backend_instruction *inst0)
  {
-   const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
-   if (is_compressed((fs_inst *)inst))
+   const fs_inst *inst = static_cast<fs_inst *>(inst0);
+   const unsigned overhead = v->grf_used && has_bank_conflict(v->devinfo, inst) ?
+      DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE) : 0;
+   if (is_compressed(inst))
        return 4 + overhead;
     else
        return 2 + overhead;
@@ -1674,6 +1672,8 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
  {
     const struct gen_device_info *devinfo = bs->devinfo;
     int time = 0;
+   int instructions_to_schedule = block->end_ip - block->start_ip + 1;
+
     if (!post_reg_alloc)
        reg_pressure = reg_pressure_in[block->num];
     block_idx = block->num;
@@ -1762,24 +1762,6 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
     }
  
     assert(instructions_to_schedule == 0);
-
-   block->cycle_count = time;
-}
-
-static unsigned get_cycle_count(cfg_t *cfg)
-{
-   unsigned count = 0, multiplier = 1;
-   foreach_block(block, cfg) {
-      if (block->start()->opcode == BRW_OPCODE_DO)
-         multiplier *= 10; /* assume that loops execute ~10 times */
-
-      count += block->cycle_count * multiplier;
-
-      if (block->end()->opcode == BRW_OPCODE_WHILE)
-         multiplier /= 10;
-   }
-
-   return count;
  }
  
  void
@@ -1821,8 +1803,6 @@ instruction_scheduler::run(cfg_t *cfg)
                post_reg_alloc);
        bs->dump_instructions();
     }
-
-   cfg->cycle_count = get_cycle_count(cfg);
  }
  
  void