X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_reg_allocate.cpp;h=a49eca56118304b23e7d80f2c8c564bfc09a7080;hb=094877f9d23169b1d209fb0c97f9b6d4679842d9;hp=cd89edd64de7a4042ac4903457356cc9eb104b5e;hpb=13372a0ce746cde6fa6e0aa3c5130e4227f123e0;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index cd89edd64de..a49eca56118 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -115,7 +115,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf); compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); ralloc_free(compiler->vec4_reg_set.regs); - compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count); + compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count, false); if (compiler->devinfo->gen >= 6) ra_set_allocate_round_robin(compiler->vec4_reg_set.regs); ralloc_free(compiler->vec4_reg_set.classes); @@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); + ra_add_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -158,6 +158,9 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) } assert(reg == ra_reg_count); + for (int reg = 0; reg < base_reg_count; reg++) + ra_make_reg_conflicts_transitive(compiler->vec4_reg_set.regs, reg); + ra_set_finalize(compiler->vec4_reg_set.regs, q_values); for (int i = 0; i < MAX_VGRF_SIZE; i++) @@ -264,6 +267,97 @@ vec4_visitor::reg_allocate() return true; } +/** + * When we decide to spill a register, instead of blindly spilling every use, + * save unspills when the spill register is used (read) in consecutive + * instructions. This can potentially save a bunch of unspills that would + * have very little impact in register allocation anyway. + * + * Notice that we need to account for this behavior when spilling a register + * and when evaluating spilling costs. This function is designed so it can + * be called from both places and avoid repeating the logic. + * + * - When we call this function from spill_reg(), we pass in scratch_reg the + * actual unspill/spill register that we want to reuse in the current + * instruction. + * + * - When we call this from evaluate_spill_costs(), we pass the register for + * which we are evaluating spilling costs. + * + * In either case, we check if the previous instructions read scratch_reg until + * we find one that writes to it with a compatible mask or does not read/write + * scratch_reg at all. + */ +static bool +can_use_scratch_for_source(const vec4_instruction *inst, unsigned i, + unsigned scratch_reg) +{ + assert(inst->src[i].file == GRF); + bool prev_inst_read_scratch_reg = false; + + /* See if any previous source in the same instructions reads scratch_reg */ + for (unsigned n = 0; n < i; n++) { + if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg) + prev_inst_read_scratch_reg = true; + } + + /* Now check if previous instructions read/write scratch_reg */ + for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev; + !prev_inst->is_head_sentinel(); + prev_inst = (vec4_instruction *) prev_inst->prev) { + + /* If the previous instruction writes to scratch_reg then we can reuse + * it if the write is not conditional and the channels we write are + * compatible with our read mask + */ + if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) { + return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) && + (brw_mask_for_swizzle(inst->src[i].swizzle) & + ~prev_inst->dst.writemask) == 0; + } + + /* Skip scratch read/writes so that instructions generated by spilling + * other registers (that won't read/write scratch_reg) do not stop us from + * reusing scratch_reg for this instruction. + */ + if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE || + prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ) + continue; + + /* If the previous instruction does not write to scratch_reg, then check + * if it reads it + */ + int n; + for (n = 0; n < 3; n++) { + if (prev_inst->src[n].file == GRF && + prev_inst->src[n].reg == scratch_reg) { + prev_inst_read_scratch_reg = true; + break; + } + } + if (n == 3) { + /* The previous instruction does not read scratch_reg. At this point, + * if no previous instruction has read scratch_reg it means that we + * will need to unspill it here and we can't reuse it (so we return + * false). Otherwise, if we found at least one consecutive instruction + * that read scratch_reg, then we know that we got here from + * evaluate_spill_costs (since for the spill_reg path any block of + * consecutive instructions using scratch_reg must start with a write + * to that register, so we would've exited the loop in the check for + * the write that we have at the start of this loop), and in that case + * it means that we found the point at which the scratch_reg would be + * unspilled. Since we always unspill a full vec4, it means that we + * have all the channels available and we can just return true to + * signal that we can reuse the register in the current instruction + * too. + */ + return prev_inst_read_scratch_reg; + } + } + + return prev_inst_read_scratch_reg; +} + void vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) { @@ -280,15 +374,21 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF) { - spill_costs[inst->src[i].reg] += loop_scale; - if (inst->src[i].reladdr) - no_spill[inst->src[i].reg] = true; - } + if (inst->src[i].file == GRF) { + /* We will only unspill src[i] it it wasn't unspilled for the + * previous instruction, in which case we'll just reuse the scratch + * reg for this instruction. + */ + if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) { + spill_costs[inst->src[i].reg] += loop_scale; + if (inst->src[i].reladdr) + no_spill[inst->src[i].reg] = true; + } + } } if (inst->dst.file == GRF) { - spill_costs[inst->dst.reg] += loop_scale; + spill_costs[inst->dst.reg] += loop_scale; if (inst->dst.reladdr) no_spill[inst->dst.reg] = true; } @@ -296,12 +396,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) switch (inst->opcode) { case BRW_OPCODE_DO: - loop_scale *= 10; - break; + loop_scale *= 10; + break; case BRW_OPCODE_WHILE: - loop_scale /= 10; - break; + loop_scale /= 10; + break; case SHADER_OPCODE_GEN4_SCRATCH_READ: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: @@ -309,12 +409,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (inst->src[i].file == GRF) no_spill[inst->src[i].reg] = true; } - if (inst->dst.file == GRF) - no_spill[inst->dst.reg] = true; - break; + if (inst->dst.file == GRF) + no_spill[inst->dst.reg] = true; + break; default: - break; + break; } } } @@ -342,19 +442,32 @@ vec4_visitor::spill_reg(int spill_reg_nr) unsigned int spill_offset = last_scratch++; /* Generate spill/unspill instructions for the objects being spilled. */ + int scratch_reg = -1; foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { - src_reg spill_reg = inst->src[i]; - inst->src[i].reg = alloc.allocate(1); - dst_reg temp = dst_reg(inst->src[i]); - - emit_scratch_read(block, inst, temp, spill_reg, spill_offset); + if (scratch_reg == -1 || + !can_use_scratch_for_source(inst, i, scratch_reg)) { + /* We need to unspill anyway so make sure we read the full vec4 + * in any case. This way, the cached register can be reused + * for consecutive instructions that read different channels of + * the same vec4. + */ + scratch_reg = alloc.allocate(1); + src_reg temp = inst->src[i]; + temp.reg = scratch_reg; + temp.swizzle = BRW_SWIZZLE_XYZW; + emit_scratch_read(block, inst, + dst_reg(temp), inst->src[i], spill_offset); + } + assert(scratch_reg != -1); + inst->src[i].reg = scratch_reg; } } if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { emit_scratch_write(block, inst, spill_offset); + scratch_reg = inst->dst.reg; } }