/* count variables to be moved and check war_hint */
bool war_hint = false;
- for (unsigned j = reg_lo; j <= reg_hi; j++) {
- if (reg_file[j] != 0)
+ bool linear_vgpr = false;
+ for (unsigned j = reg_lo; j <= reg_hi && !linear_vgpr; j++) {
+ if (reg_file[j] != 0) {
k++;
+ /* we cannot split live ranges of linear vgprs */
+ if (ctx.assignments[reg_file[j]].second & (1 << 6))
+ linear_vgpr = true;
+ }
war_hint |= ctx.war_hint[j];
}
+ if (linear_vgpr || (war_hint && !best_war_hint))
+ continue;
/* count operands in wrong positions */
for (unsigned j = 0, offset = 0; j < instr->operands.size(); offset += instr->operands[j].size(), j++) {
k += instr->operands[j].size();
}
bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
- if (k > num_moves || (!aligned && k == num_moves) || (war_hint && !best_war_hint))
+ if (k > num_moves || (!aligned && k == num_moves))
continue;
best_pos = reg_lo;
break;
}
}
- if (!writes_sgpr)
+ /* if all operands are constant, no need to care either */
+ bool reads_sgpr = false;
+ for (Operand& op : instr->operands) {
+ if (op.isTemp() && op.getTemp().type() == RegType::sgpr) {
+ reads_sgpr = true;
+ break;
+ }
+ }
+ if (!(writes_sgpr && reads_sgpr))
return;
Pseudo_instruction *pi = (Pseudo_instruction *)instr;
handle_live_in = [&](Temp val, Block *block) -> Temp {
std::vector<unsigned>& preds = val.is_linear() ? block->linear_preds : block->logical_preds;
- if (preds.size() == 0 && block->index != 0) {
+ if (preds.size() == 0 || val.regClass() == val.regClass().as_linear()) {
renames[block->index][val.id()] = val;
return val;
}
std::vector<aco_ptr<Instruction>>::reverse_iterator rit;
for (rit = block.instructions.rbegin(); rit != block.instructions.rend(); ++rit) {
aco_ptr<Instruction>& instr = *rit;
- if (!is_phi(instr)) {
- for (const Operand& op : instr->operands) {
- if (op.isTemp())
- live.emplace(op.getTemp());
- }
- if (instr->opcode == aco_opcode::p_create_vector) {
- for (const Operand& op : instr->operands) {
- if (op.isTemp() && op.getTemp().type() == instr->definitions[0].getTemp().type())
- vectors[op.tempId()] = instr.get();
- }
- }
- } else if (!instr->definitions[0].isKill() && !instr->definitions[0].isFixed()) {
+ if (is_phi(instr)) {
+ live.erase(instr->definitions[0].getTemp());
+ if (instr->definitions[0].isKill() || instr->definitions[0].isFixed())
+ continue;
/* collect information about affinity-related temporaries */
std::vector<Temp> affinity_related;
/* affinity_related[0] is the last seen affinity-related temp */
}
}
phi_ressources.emplace_back(std::move(affinity_related));
+ continue;
}
- /* erase from live */
- for (const Definition& def : instr->definitions) {
- if (def.isTemp()) {
- live.erase(def.getTemp());
- std::map<unsigned, unsigned>::iterator it = temp_to_phi_ressources.find(def.tempId());
- if (it != temp_to_phi_ressources.end() && def.regClass() == phi_ressources[it->second][0].regClass())
- phi_ressources[it->second][0] = def.getTemp();
+ /* add vector affinities */
+ if (instr->opcode == aco_opcode::p_create_vector) {
+ for (const Operand& op : instr->operands) {
+ if (op.isTemp() && op.getTemp().type() == instr->definitions[0].getTemp().type())
+ vectors[op.tempId()] = instr.get();
+ }
+ }
+
+ /* add operands to live variables */
+ for (const Operand& op : instr->operands) {
+ if (op.isTemp())
+ live.emplace(op.getTemp());
+ }
+
+ /* erase definitions from live */
+ for (unsigned i = 0; i < instr->definitions.size(); i++) {
+ const Definition& def = instr->definitions[i];
+ if (!def.isTemp())
+ continue;
+ live.erase(def.getTemp());
+ /* mark last-seen phi operand */
+ std::map<unsigned, unsigned>::iterator it = temp_to_phi_ressources.find(def.tempId());
+ if (it != temp_to_phi_ressources.end() && def.regClass() == phi_ressources[it->second][0].regClass()) {
+ phi_ressources[it->second][0] = def.getTemp();
+ /* try to coalesce phi affinities with parallelcopies */
+ if (!def.isFixed() && instr->opcode == aco_opcode::p_parallelcopy) {
+ Operand op = instr->operands[i];
+ if (op.isTemp() && op.isFirstKill() && def.regClass() == op.regClass()) {
+ phi_ressources[it->second].emplace_back(op.getTemp());
+ temp_to_phi_ressources[op.tempId()] = it->second;
+ }
+ }
}
}
}
/* process parallelcopy */
for (std::pair<Operand, Definition> pc : parallelcopy) {
+ /* see if it's a copy from a different phi */
+ //TODO: prefer moving some previous phis over live-ins
+ //TODO: somehow prevent phis fixed before the RA from being updated (shouldn't be a problem in practice since they can only be fixed to exec)
+ Instruction *prev_phi = NULL;
+ std::vector<aco_ptr<Instruction>>::iterator phi_it;
+ for (phi_it = instructions.begin(); phi_it != instructions.end(); ++phi_it) {
+ if ((*phi_it)->definitions[0].tempId() == pc.first.tempId())
+ prev_phi = phi_it->get();
+ }
+ phi_it = it;
+ while (!prev_phi && is_phi(*++phi_it)) {
+ if ((*phi_it)->definitions[0].tempId() == pc.first.tempId())
+ prev_phi = phi_it->get();
+ }
+ if (prev_phi) {
+ /* if so, just update that phi's register */
+ prev_phi->definitions[0].setFixed(pc.second.physReg());
+ ctx.assignments[prev_phi->definitions[0].tempId()] = {pc.second.physReg(), pc.second.regClass()};
+ for (unsigned reg = pc.second.physReg(); reg < pc.second.physReg() + pc.second.size(); reg++)
+ register_file[reg] = prev_phi->definitions[0].tempId();
+ continue;
+ }
+
/* rename */
std::map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId());
Temp orig = pc.first.getTemp();
renames[block.index][orig.id()] = pc.second.getTemp();
renames[block.index][pc.second.tempId()] = pc.second.getTemp();
- /* see if it's a copy from a previous phi */
- //TODO: prefer moving some previous phis over live-ins
- //TODO: somehow prevent phis fixed before the RA from being updated (shouldn't be a problem in practice since they can only be fixed to exec)
- Instruction *prev_phi = NULL;
- for (auto it2 = instructions.begin(); it2 != instructions.end(); ++it2) {
- if ((*it2)->definitions[0].tempId() == pc.first.tempId())
- prev_phi = it2->get();
- }
- if (prev_phi) {
- /* if so, just update that phi */
- prev_phi->definitions[0] = pc.second;
- continue;
- }
-
/* otherwise, this is a live-in and we need to create a new phi
* to move it in this block's predecessors */
aco_opcode opcode = pc.first.getTemp().is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
}
/* fill in sgpr_live_in */
- for (unsigned i = 0; i < ctx.max_used_sgpr; i++)
+ for (unsigned i = 0; i <= ctx.max_used_sgpr; i++)
sgpr_live_in[block.index][i] = register_file[i];
sgpr_live_in[block.index][127] = register_file[scc.reg];
for (unsigned j = 0; j < i; j++) {
Operand& op = instr->operands[j];
if (op.isTemp() && op.tempId() == blocking_id) {
- op = Operand(pc_def.getTemp());
+ op.setTemp(pc_def.getTemp());
op.setFixed(reg);
}
}
instr->operands[1].getTemp().type() == RegType::vgpr) { /* TODO: swap src0 and src1 in this case */
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
bool can_use_mac = !(vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
- vop3->opsel[0] || vop3->opsel[1] || vop3->opsel[2] ||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
- vop3->clamp || vop3->omod);
+ vop3->clamp || vop3->omod || vop3->opsel);
if (can_use_mac) {
instr->format = Format::VOP2;
instr->opcode = aco_opcode::v_mac_f32;
/* handle definitions which must have the same register as an operand */
if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
instr->opcode == aco_opcode::v_mac_f32 ||
- instr->opcode == aco_opcode::v_writelane_b32) {
+ instr->opcode == aco_opcode::v_writelane_b32 ||
+ instr->opcode == aco_opcode::v_writelane_b32_e64) {
instr->definitions[0].setFixed(instr->operands[2].physReg());
} else if (instr->opcode == aco_opcode::s_addk_i32 ||
instr->opcode == aco_opcode::s_mulk_i32) {
pc->operands[i] = parallelcopy[i].first;
pc->definitions[i] = parallelcopy[i].second;
+ assert(pc->operands[i].size() == pc->definitions[i].size());
/* it might happen that the operand is already renamed. we have to restore the original name. */
std::map<unsigned, Temp>::iterator it = ctx.orig_names.find(pc->operands[i].tempId());
if (instr_needs_vop3) {
/* if the first operand is a literal, we have to move it to a reg */
- if (instr->operands.size() && instr->operands[0].isLiteral()) {
+ if (instr->operands.size() && instr->operands[0].isLiteral() && program->chip_class < GFX10) {
bool can_sgpr = true;
/* check, if we have to move to vgpr */
for (const Operand& op : instr->operands) {