RegisterFile() {regs.fill(0);}
std::array<uint32_t, 512> regs;
+ std::map<uint32_t, std::array<uint32_t, 4>> subdword_regs;
const uint32_t& operator [] (unsigned index) const {
return regs[index];
return res;
}
- bool test(PhysReg start, unsigned size) {
- for (unsigned i = 0; i < size; i++) {
- if (regs[start + i])
+ bool test(PhysReg start, unsigned num_bytes) {
+ for (PhysReg i = start; i.reg_b < start.reg_b + num_bytes; i = PhysReg(i + 1)) {
+ if (regs[i] & 0x0FFFFFFF)
return true;
+ if (regs[i] == 0xF0000000) {
+ assert(subdword_regs.find(i) != subdword_regs.end());
+ for (unsigned j = i.byte(); i * 4 + j < start.reg_b + num_bytes && j < 4; j++) {
+ if (subdword_regs[i][j])
+ return true;
+ }
+ }
}
return false;
}
regs[start + i] = val;
}
- void clear(PhysReg start, unsigned size) {
- fill(start, size, 0);
+ void fill_subdword(PhysReg start, unsigned num_bytes, uint32_t val) {
+ fill(start, DIV_ROUND_UP(num_bytes, 4), 0xF0000000);
+ for (PhysReg i = start; i.reg_b < start.reg_b + num_bytes; i = PhysReg(i + 1)) {
+ /* emplace or get */
+ std::array<uint32_t, 4>& sub = subdword_regs.emplace(i, std::array<uint32_t, 4>{0, 0, 0, 0}).first->second;
+ for (unsigned j = i.byte(); i * 4 + j < start.reg_b + num_bytes && j < 4; j++)
+ sub[j] = val;
+
+ if (sub == std::array<uint32_t, 4>{0, 0, 0, 0}) {
+ subdword_regs.erase(i);
+ regs[i] = 0;
+ }
+ }
+ }
+
+ void block(PhysReg start, unsigned num_bytes) {
+ if (start.byte() || num_bytes % 4)
+ fill_subdword(start, num_bytes, 0xFFFFFFFF);
+ else
+ fill(start, num_bytes / 4, 0xFFFFFFFF);
+ }
+
+ bool is_blocked(PhysReg start) {
+ if (regs[start] == 0xFFFFFFFF)
+ return true;
+ if (regs[start] == 0xF0000000) {
+ for (unsigned i = start.byte(); i < 4; i++)
+ if (subdword_regs[start][i] == 0xFFFFFFFF)
+ return true;
+ }
+ return false;
+ }
+
+ void clear(PhysReg start, RegClass rc) {
+ if (rc.is_subdword())
+ fill_subdword(start, rc.bytes(), 0);
+ else
+ fill(start, rc.size(), 0);
}
void fill(Operand op) {
- fill(op.physReg(), op.size(), op.tempId());
+ if (op.regClass().is_subdword())
+ fill_subdword(op.physReg(), op.bytes(), op.tempId());
+ else
+ fill(op.physReg(), op.size(), op.tempId());
}
void clear(Operand op) {
- fill(op.physReg(), op.size(), 0);
+ clear(op.physReg(), op.regClass());
}
void fill(Definition def) {
- fill(def.physReg(), def.size(), def.tempId());
+ if (def.regClass().is_subdword())
+ fill_subdword(def.physReg(), def.bytes(), def.tempId());
+ else
+ fill(def.physReg(), def.size(), def.tempId());
}
void clear(Definition def) {
- fill(def.physReg(), def.size(), 0);
+ clear(def.physReg(), def.regClass());
}
};
// FIXME: if a definition got moved, change the target location and remove the parallelcopy
copy.second.setTemp(Temp(ctx.program->allocateId(), copy.second.regClass()));
ctx.assignments[copy.second.tempId()] = {copy.second.physReg(), copy.second.regClass()};
- for (unsigned i = copy.second.physReg().reg(); i < copy.second.physReg() + copy.second.size(); i++)
- reg_file[i] = copy.second.tempId();
+ reg_file.fill(copy.second);
+
/* check if we moved an operand */
for (Operand& op : instr->operands) {
if (!op.isTemp())
if (res.second) {
/* mark the area as blocked */
- reg_file.fill(res.first, size, 0xFFFFFFFF);
+ reg_file.block(res.first, var.second.bytes());
+
/* create parallelcopy pair (without definition id) */
Temp tmp = Temp(id, var.second);
Operand pc_op = Operand(tmp);
if (reg_file[j] == 0 || reg_file[j] == last_var)
continue;
- /* 0xFFFF signals that this area is already blocked! */
- if (reg_file[j] == 0xFFFFFFFF || k > num_moves) {
+ if (reg_file.is_blocked(PhysReg{j}) || k > num_moves) {
found = false;
break;
}
unsigned size = ctx.assignments[reg_file[j]].second.size();
unsigned id = reg_file[j];
new_vars.emplace(size, id);
- reg_file.clear(ctx.assignments[id].first, size);
+ reg_file.clear(ctx.assignments[id].first, ctx.assignments[id].second);
}
}
/* mark the area as blocked */
- reg_file.fill(PhysReg{reg_lo}, size, 0xFFFFFFFF);
+ reg_file.block(PhysReg{reg_lo}, size * 4);
if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, lb, ub, instr, def_reg_lo, def_reg_hi))
return false;
instr->operands[j].physReg() >= lb &&
instr->operands[j].physReg() < ub) {
assert(instr->operands[j].isFixed());
- assert(reg_file[instr->operands[j].physReg()] == 0);
- reg_file.fill(instr->operands[j].physReg(), instr->operands[j].size(), 0xFFFFFFFF);
+ assert(!reg_file.test(instr->operands[j].physReg(), instr->operands[j].bytes()));
+ reg_file.block(instr->operands[j].physReg(), instr->operands[j].bytes());
killed_ops += instr->operands[j].getTemp().size();
}
}
continue;
/* dead operands effectively reduce the number of estimated moves */
- if (remaining_op_moves && reg_file[j] == 0xFFFFFFFF) {
+ if (remaining_op_moves && reg_file.is_blocked(PhysReg{j})) {
k--;
remaining_op_moves--;
continue;
for (unsigned j = best_pos; j < best_pos + size; j++) {
if (reg_file[j] != 0xFFFFFFFF && reg_file[j] != 0)
vars.emplace(ctx.assignments[reg_file[j]].second.size(), reg_file[j]);
- reg_file[j] = 0;
+ reg_file.clear(ctx.assignments[reg_file[j]].first, ctx.assignments[reg_file[j]].second);
}
if (instr->opcode == aco_opcode::p_create_vector) {
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
/* we set the definition regs == 0. the actual caller is responsible for correct setting */
- reg_file.clear(PhysReg{best_pos}, size);
+ reg_file.clear(PhysReg{best_pos}, rc);
update_renames(ctx, reg_file, parallelcopies, instr);
if (reg_lo < lb || reg_hi >= ub || reg_lo > reg_hi)
return false;
- if (reg_file.test(reg, size))
+ if (reg_file.test(reg, rc.bytes()))
return false;
+
adjust_max_used_regs(ctx, rc, reg_lo);
return true;
}
continue;
assert(definition.physReg() == exec);
- assert(!register_file.test(definition.physReg(), definition.size()));
+ assert(!register_file.test(definition.physReg(), definition.bytes()));
register_file.fill(definition);
ctx.assignments[definition.tempId()] = {definition.physReg(), definition.regClass()};
}
continue;
}
/* only assign if register is still free */
- if (!register_file.test(reg, definition.size())) {
+ if (!register_file.test(reg, definition.bytes())) {
definition.setFixed(reg);
register_file.fill(definition);
ctx.assignments[definition.tempId()] = {definition.physReg(), definition.regClass()};