X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_live_var_analysis.cpp;h=0378dbaf335e87b6ded33892457fe1b0c6e2d7d2;hb=51bc11abc206ae5ea0946f5a79c68527701c24e0;hp=c00325b92b7184886d821f9ae71ce7a7e475ed8c;hpb=3f96a1ed86ca295d8786da84b195211cb3b383ff;p=mesa.git diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index c00325b92b7..0378dbaf335 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -36,8 +36,53 @@ #include "vulkan/radv_shader.h" namespace aco { -namespace { +RegisterDemand get_live_changes(aco_ptr& instr) +{ + RegisterDemand changes; + for (const Definition& def : instr->definitions) { + if (!def.isTemp() || def.isKill()) + continue; + changes += def.getTemp(); + } + + for (const Operand& op : instr->operands) { + if (!op.isTemp() || !op.isFirstKill()) + continue; + changes -= op.getTemp(); + } + + return changes; +} + +RegisterDemand get_temp_registers(aco_ptr& instr) +{ + RegisterDemand temp_registers; + + for (Definition def : instr->definitions) { + if (!def.isTemp()) + continue; + if (def.isKill()) + temp_registers += def.getTemp(); + } + + for (Operand op : instr->operands) { + if (op.isTemp() && op.isLateKill() && op.isFirstKill()) + temp_registers += op.getTemp(); + } + + return temp_registers; +} + +RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr& instr, aco_ptr& instr_before) +{ + demand -= get_live_changes(instr); + demand -= get_temp_registers(instr); + if (instr_before) + demand += get_temp_registers(instr_before); + return demand; +} +namespace { void process_live_temps_per_block(Program *program, live& lives, Block* block, std::set& worklist, std::vector& phi_sgpr_ops) { @@ -46,28 +91,18 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, register_demand.resize(block->instructions.size()); block->register_demand = RegisterDemand(); - - std::set live_sgprs; - std::set live_vgprs; + TempSet live = lives.live_out[block->index]; /* add the live_out_exec to live */ bool exec_live = false; if (block->live_out_exec != Temp()) { - live_sgprs.insert(block->live_out_exec); - new_demand.sgpr += program->lane_mask.size(); + live.insert(block->live_out_exec); exec_live = true; } - /* split the live-outs from this block into the temporary sets */ - std::vector>& live_temps = lives.live_out; - for (const Temp temp : live_temps[block->index]) { - const bool inserted = temp.is_linear() - ? live_sgprs.insert(temp).second - : live_vgprs.insert(temp).second; - if (inserted) { - new_demand += temp; - } - } + /* initialize register demand */ + for (Temp t : live) + new_demand += t; new_demand.sgpr -= phi_sgpr_ops[block->index]; /* traverse the instructions backwards */ @@ -87,13 +122,11 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, if (!definition.isTemp()) { continue; } + if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc) + program->needs_vcc = true; const Temp temp = definition.getTemp(); - size_t n = 0; - if (temp.is_linear()) - n = live_sgprs.erase(temp); - else - n = live_vgprs.erase(temp); + const size_t n = live.erase(temp); if (n) { new_demand -= temp; @@ -120,13 +153,12 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, for (unsigned i = 0; i < insn->operands.size(); ++i) { Operand& operand = insn->operands[i]; - if (!operand.isTemp()) { + if (!operand.isTemp()) continue; - } + if (operand.isFixed() && operand.physReg() == vcc) + program->needs_vcc = true; const Temp temp = operand.getTemp(); - const bool inserted = temp.is_linear() - ? live_sgprs.insert(temp).second - : live_vgprs.insert(temp).second; + const bool inserted = live.insert(temp).second; if (inserted) { operand.setFirstKill(true); for (unsigned j = i + 1; j < insn->operands.size(); ++j) { @@ -135,6 +167,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, insn->operands[j].setKill(true); } } + if (operand.isLateKill()) + register_demand[idx] += temp; new_demand += temp; } @@ -161,13 +195,10 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, assert(is_phi(insn)); assert(insn->definitions.size() == 1 && insn->definitions[0].isTemp()); Definition& definition = insn->definitions[0]; + if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc) + program->needs_vcc = true; const Temp temp = definition.getTemp(); - size_t n = 0; - - if (temp.is_linear()) - n = live_sgprs.erase(temp); - else - n = live_vgprs.erase(temp); + const size_t n = live.erase(temp); if (n) definition.setKill(false); @@ -177,18 +208,17 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, phi_idx--; } - /* now, we have the live-in sets and need to merge them into the live-out sets */ - for (unsigned pred_idx : block->logical_preds) { - for (Temp vgpr : live_vgprs) { - auto it = live_temps[pred_idx].insert(vgpr); - if (it.second) - worklist.insert(pred_idx); - } - } + /* now, we need to merge the live-ins into the live-out sets */ + for (Temp t : live) { + std::vector& preds = t.is_linear() ? block->linear_preds : block->logical_preds; + +#ifndef NDEBUG + if (preds.empty()) + fprintf(stderr, "Temporary never defined or are defined after use: %%%d in BB%d\n", t.id(), block->index); +#endif - for (unsigned pred_idx : block->linear_preds) { - for (Temp sgpr : live_sgprs) { - auto it = live_temps[pred_idx].insert(sgpr); + for (unsigned pred_idx : preds) { + auto it = lives.live_out[pred_idx].insert(t); if (it.second) worklist.insert(pred_idx); } @@ -205,11 +235,12 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, : block->linear_preds; for (unsigned i = 0; i < preds.size(); ++i) { Operand &operand = insn->operands[i]; - if (!operand.isTemp()) { + if (!operand.isTemp()) continue; - } + if (operand.isFixed() && operand.physReg() == vcc) + program->needs_vcc = true; /* check if we changed an already processed block */ - const bool inserted = live_temps[preds[i]].insert(operand.getTemp()).second; + const bool inserted = lives.live_out[preds[i]].insert(operand.getTemp()).second; if (inserted) { operand.setKill(true); worklist.insert(preds[i]); @@ -220,26 +251,16 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, phi_idx--; } - if (!(block->index != 0 || (live_vgprs.empty() && live_sgprs.empty()))) { - aco_print_program(program, stderr); - fprintf(stderr, "These temporaries are never defined or are defined after use:\n"); - for (Temp vgpr : live_vgprs) - fprintf(stderr, "%%%d\n", vgpr.id()); - for (Temp sgpr : live_sgprs) - fprintf(stderr, "%%%d\n", sgpr.id()); - abort(); - } - - assert(block->index != 0 || new_demand == RegisterDemand()); + assert(block->index != 0 || (new_demand == RegisterDemand() && live.empty())); } unsigned calc_waves_per_workgroup(Program *program) { - unsigned workgroup_size = program->wave_size; - if (program->stage == compute_cs) { - unsigned* bsize = program->info->cs.block_size; - workgroup_size = bsize[0] * bsize[1] * bsize[2]; - } + /* When workgroup size is not known, just go with wave_size */ + unsigned workgroup_size = program->workgroup_size == UINT_MAX + ? program->wave_size + : program->workgroup_size; + return align(workgroup_size, program->wave_size) / program->wave_size; } } /* end namespace */ @@ -248,19 +269,19 @@ uint16_t get_extra_sgprs(Program *program) { if (program->chip_class >= GFX10) { assert(!program->needs_flat_scr); - assert(!program->needs_xnack_mask); + assert(!program->xnack_enabled); return 2; } else if (program->chip_class >= GFX8) { if (program->needs_flat_scr) return 6; - else if (program->needs_xnack_mask) + else if (program->xnack_enabled) return 4; else if (program->needs_vcc) return 2; else return 0; } else { - assert(!program->needs_xnack_mask); + assert(!program->xnack_enabled); if (program->needs_flat_scr) return 4; else if (program->needs_vcc) @@ -363,6 +384,8 @@ live live_var_analysis(Program* program, std::vector phi_sgpr_ops(program->blocks.size()); RegisterDemand new_demand; + program->needs_vcc = false; + /* this implementation assumes that the block idx corresponds to the block's position in program->blocks vector */ for (Block& block : program->blocks) worklist.insert(block.index);