From: Iago Toral Quiroga Date: Fri, 9 Sep 2016 10:21:06 +0000 (+0200) Subject: i965/vec4: avoid spilling of registers that mix 32-bit and 64-bit access X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8843c43f7e85423b559383b38c77477139b4b06e;p=mesa.git i965/vec4: avoid spilling of registers that mix 32-bit and 64-bit access When 64-bit registers are (un)spilled, we need to execute data shuffling code before writing to or after reading from memory. If we have instructions that operate on 64-bit data via 32-bit instructions, (un)spills for the register produced by 32-bit instructions will not do data shuffling at all (because we only see a normal 32-bit istruction seemingly operating on 32-bit data). This means that subsequent reads with that register using DF access will unshuffle data read from memory that was never adequately shuffled when it was written. Fixing this would require to identify which 32-bit instructions write 64-bit data and emit spill instructions only when the full 64-bit data has been written (by multiple 32-bit instructions writing to different offsets of the same register) and always emit 64-bit unspills whenever 64-bit data is read, even when the instruction uses a 32-bit type to read from them. Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 242e6664623..79fd15b12bb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -374,9 +374,13 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) { float loop_scale = 1.0; + unsigned *reg_type_size = (unsigned *) + ralloc_size(NULL, this->alloc.count * sizeof(unsigned)); + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2; + reg_type_size[i] = 0; } /* Calculate costs for spilling nodes. Call it a cost of 1 per @@ -406,6 +410,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8) no_spill[inst->src[i].nr] = true; } + + /* We can't spill registers that mix 32-bit and 64-bit access (that + * contain 64-bit data that is operated on via 32-bit instructions) + */ + unsigned type_size = type_sz(inst->src[i].type); + if (reg_type_size[inst->src[i].nr] == 0) + reg_type_size[inst->src[i].nr] = type_size; + else if (reg_type_size[inst->src[i].nr] != type_size) + no_spill[inst->src[i].nr] = true; } } @@ -422,6 +435,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) */ if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8) no_spill[inst->dst.nr] = true; + + /* We can't spill registers that mix 32-bit and 64-bit access (that + * contain 64-bit data that is operated on via 32-bit instructions) + */ + unsigned type_size = type_sz(inst->dst.type); + if (reg_type_size[inst->dst.nr] == 0) + reg_type_size[inst->dst.nr] = type_size; + else if (reg_type_size[inst->dst.nr] != type_size) + no_spill[inst->dst.nr] = true; } switch (inst->opcode) { @@ -448,6 +470,8 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) break; } } + + ralloc_free(reg_type_size); } int