X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fintel%2Fcompiler%2Fbrw_schedule_instructions.cpp;h=7a0fb1ac684d3e9eac68035615494529ccedbc71;hb=5c5555a862754a5b43fee2abf4fc34e888d22a06;hp=826f2e771e3e9a270bcb0277502929f357de32d3;hpb=d966a6b4c4684bc02647a8fdc69a6c88e5ed00c2;p=mesa.git diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 826f2e771e3..7a0fb1ac684 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -533,16 +533,15 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: - instruction_scheduler(backend_shader *s, int grf_count, + instruction_scheduler(const backend_shader *s, int grf_count, unsigned hw_reg_count, int block_count, - instruction_scheduler_mode mode) + instruction_scheduler_mode mode): + bs(s) { - this->bs = s; this->mem_ctx = ralloc_context(NULL); this->grf_count = grf_count; this->hw_reg_count = hw_reg_count; this->instructions.make_empty(); - this->instructions_to_schedule = 0; this->post_reg_alloc = (mode == SCHEDULE_POST); this->mode = mode; if (!post_reg_alloc) { @@ -613,13 +612,12 @@ public: void *mem_ctx; bool post_reg_alloc; - int instructions_to_schedule; int grf_count; unsigned hw_reg_count; int reg_pressure; int block_idx; exec_list instructions; - backend_shader *bs; + const backend_shader *bs; instruction_scheduler_mode mode; @@ -669,14 +667,14 @@ public: class fs_instruction_scheduler : public instruction_scheduler { public: - fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count, + fs_instruction_scheduler(const fs_visitor *v, int grf_count, int hw_reg_count, int block_count, instruction_scheduler_mode mode); void calculate_deps(); - bool is_compressed(fs_inst *inst); + bool is_compressed(const fs_inst *inst); schedule_node *choose_instruction_to_schedule(); int issue_time(backend_instruction *inst); - fs_visitor *v; + const fs_visitor *v; void count_reads_remaining(backend_instruction *inst); void setup_liveness(cfg_t *cfg); @@ -684,7 +682,7 @@ public: int get_register_pressure_benefit(backend_instruction *inst); }; -fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, +fs_instruction_scheduler::fs_instruction_scheduler(const fs_visitor *v, int grf_count, int hw_reg_count, int block_count, instruction_scheduler_mode mode) @@ -730,21 +728,23 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) void fs_instruction_scheduler::setup_liveness(cfg_t *cfg) { + const fs_live_variables &live = v->live_analysis.require(); + /* First, compute liveness on a per-GRF level using the in/out sets from * liveness calculation. */ for (int block = 0; block < cfg->num_blocks; block++) { - for (int i = 0; i < v->live_intervals->num_vars; i++) { - if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) { - int vgrf = v->live_intervals->vgrf_from_var[i]; + for (int i = 0; i < live.num_vars; i++) { + if (BITSET_TEST(live.block_data[block].livein, i)) { + int vgrf = live.vgrf_from_var[i]; if (!BITSET_TEST(livein[block], vgrf)) { reg_pressure_in[block] += v->alloc.sizes[vgrf]; BITSET_SET(livein[block], vgrf); } } - if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) - BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]); + if (BITSET_TEST(live.block_data[block].liveout, i)) + BITSET_SET(liveout[block], live.vgrf_from_var[i]); } } @@ -754,8 +754,8 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg) */ for (int block = 0; block < cfg->num_blocks - 1; block++) { for (int i = 0; i < grf_count; i++) { - if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip && - v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) { + if (live.vgrf_start[i] <= cfg->blocks[block]->end_ip && + live.vgrf_end[i] >= cfg->blocks[block + 1]->start_ip) { if (!BITSET_TEST(livein[block + 1], i)) { reg_pressure_in[block + 1] += v->alloc.sizes[i]; BITSET_SET(livein[block + 1], i); @@ -848,11 +848,11 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) class vec4_instruction_scheduler : public instruction_scheduler { public: - vec4_instruction_scheduler(vec4_visitor *v, int grf_count); + vec4_instruction_scheduler(const vec4_visitor *v, int grf_count); void calculate_deps(); schedule_node *choose_instruction_to_schedule(); int issue_time(backend_instruction *inst); - vec4_visitor *v; + const vec4_visitor *v; void count_reads_remaining(backend_instruction *inst); void setup_liveness(cfg_t *cfg); @@ -860,7 +860,7 @@ public: int get_register_pressure_benefit(backend_instruction *inst); }; -vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, +vec4_instruction_scheduler::vec4_instruction_scheduler(const vec4_visitor *v, int grf_count) : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST), v(v) @@ -923,8 +923,6 @@ instruction_scheduler::add_insts_from_block(bblock_t *block) instructions.push_tail(n); } - - this->instructions_to_schedule = block->end_ip - block->start_ip + 1; } /** Computation of the delay member of each node. */ @@ -1065,7 +1063,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) * actually writes 2 MRFs. */ bool -fs_instruction_scheduler::is_compressed(fs_inst *inst) +fs_instruction_scheduler::is_compressed(const fs_inst *inst) { return inst->exec_size == 16; } @@ -1416,7 +1414,7 @@ vec4_instruction_scheduler::calculate_deps() } if (inst->mlen > 0 && !inst->is_send_from_grf()) { - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { add_dep(last_mrf_write[inst->base_mrf + i], n); last_mrf_write[inst->base_mrf + i] = n; } @@ -1493,7 +1491,7 @@ vec4_instruction_scheduler::calculate_deps() } if (inst->mlen > 0 && !inst->is_send_from_grf()) { - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { last_mrf_write[inst->base_mrf + i] = n; } } @@ -1651,10 +1649,12 @@ vec4_instruction_scheduler::choose_instruction_to_schedule() } int -fs_instruction_scheduler::issue_time(backend_instruction *inst) +fs_instruction_scheduler::issue_time(backend_instruction *inst0) { - const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst); - if (is_compressed((fs_inst *)inst)) + const fs_inst *inst = static_cast(inst0); + const unsigned overhead = v->grf_used && has_bank_conflict(v->devinfo, inst) ? + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE) : 0; + if (is_compressed(inst)) return 4 + overhead; else return 2 + overhead; @@ -1672,6 +1672,8 @@ instruction_scheduler::schedule_instructions(bblock_t *block) { const struct gen_device_info *devinfo = bs->devinfo; int time = 0; + int instructions_to_schedule = block->end_ip - block->start_ip + 1; + if (!post_reg_alloc) reg_pressure = reg_pressure_in[block->num]; block_idx = block->num; @@ -1760,24 +1762,6 @@ instruction_scheduler::schedule_instructions(bblock_t *block) } assert(instructions_to_schedule == 0); - - block->cycle_count = time; -} - -static unsigned get_cycle_count(cfg_t *cfg) -{ - unsigned count = 0, multiplier = 1; - foreach_block(block, cfg) { - if (block->start()->opcode == BRW_OPCODE_DO) - multiplier *= 10; /* assume that loops execute ~10 times */ - - count += block->cycle_count * multiplier; - - if (block->end()->opcode == BRW_OPCODE_WHILE) - multiplier /= 10; - } - - return count; } void @@ -1819,16 +1803,11 @@ instruction_scheduler::run(cfg_t *cfg) post_reg_alloc); bs->dump_instructions(); } - - cfg->cycle_count = get_cycle_count(cfg); } void fs_visitor::schedule_instructions(instruction_scheduler_mode mode) { - if (mode != SCHEDULE_POST) - calculate_live_intervals(); - int grf_count; if (mode == SCHEDULE_POST) grf_count = grf_used; @@ -1839,7 +1818,7 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode) cfg->num_blocks, mode); sched.run(cfg); - invalidate_analysis(DEPENDENCY_EVERYTHING); + invalidate_analysis(DEPENDENCY_INSTRUCTIONS); } void @@ -1848,5 +1827,5 @@ vec4_visitor::opt_schedule_instructions() vec4_instruction_scheduler sched(this, prog_data->total_grf); sched.run(cfg); - invalidate_analysis(DEPENDENCY_EVERYTHING); + invalidate_analysis(DEPENDENCY_INSTRUCTIONS); }