intel/fs: Replace fs_visitor::bank_conflict_cycles() with stand-alone function.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 2 Apr 2020 23:20:34 +0000 (16:20 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 29 Apr 2020 06:00:29 +0000 (23:00 -0700)
This will be re-usable by the IR performance analysis pass.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_bank_conflicts.cpp
src/intel/compiler/brw_ir_fs.h
src/intel/compiler/brw_schedule_instructions.cpp

index f2612968f25a460318fc66c5f9a2981b59bef65b..b421723c53ec8468612e161bf17f47e5f8dfaa31 100644 (file)
@@ -167,7 +167,6 @@ public:
    bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool opt_bank_conflicts();
-   unsigned bank_conflict_cycles(const fs_inst *inst) const;
    bool register_coalesce();
    bool compute_to_mrf();
    bool eliminate_find_live_channel();
index f10caf249d0f51cce35c6e7fd8f620fc8f859975..ec19dc6160b9838b0a38e0d2fb3b94e70419a68e 100644 (file)
@@ -935,20 +935,16 @@ fs_visitor::opt_bank_conflicts()
 }
 
 /**
- * Estimate the number of GRF bank conflict cycles incurred by an instruction.
+ * Return whether the instruction incurs GRF bank conflict cycles.
  *
- * Note that this neglects conflict cycles prior to register allocation
- * because we don't know which bank each VGRF is going to end up aligned to.
+ * Note that this is only accurate after register allocation because otherwise
+ * we don't know which bank each VGRF is going to end up aligned to.
  */
-unsigned
-fs_visitor::bank_conflict_cycles(const fs_inst *inst) const
+bool
+has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst)
 {
-   if (grf_used && inst->is_3src(devinfo) &&
-       is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
-       bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
-       !is_conflict_optimized_out(devinfo, inst)) {
-      return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
-   } else {
-      return 0;
-   }
+   return inst->is_3src(devinfo) &&
+          is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
+          bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
+          !is_conflict_optimized_out(devinfo, inst);
 }
index f7b389d360e69606c362f9a9addb4261a79072b8..6ba3a6ca97e7788e710e43f34ef1fcb03ec08cfa 100644 (file)
@@ -667,4 +667,7 @@ is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
           alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
 }
 
+bool
+has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst);
+
 #endif
index 7c1390b86892146261109aa0c3d661d10c8aaace..6edafc8bffcfe5d1fa01066a653717c1dd16c875 100644 (file)
@@ -1649,10 +1649,12 @@ vec4_instruction_scheduler::choose_instruction_to_schedule()
 }
 
 int
-fs_instruction_scheduler::issue_time(backend_instruction *inst)
+fs_instruction_scheduler::issue_time(backend_instruction *inst0)
 {
-   const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
-   if (is_compressed((fs_inst *)inst))
+   const fs_inst *inst = static_cast<fs_inst *>(inst0);
+   const unsigned overhead = v->grf_used && has_bank_conflict(v->devinfo, inst) ?
+      DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE) : 0;
+   if (is_compressed(inst))
       return 4 + overhead;
    else
       return 2 + overhead;