From 82c69426a5a32f9189c8b01059f831c84e9b83a3 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 1 Sep 2016 14:38:57 +0200 Subject: [PATCH] i965/vec4: support basic spilling of 64-bit registers The current spilling code can't spill vgrf allocations larger than 1 but SIMD4x2 doubles require 2 vgrfs, so we need to permit this case (which is handled properly for DF data types by emitting 2 scratch messages and doing data shuffling). We accomplish this by not auto-disabling spilling for vgrf allocations with a size of 2, and then disable spilling on any register with an offset != 0B (which indicates array access). Disable spilling of partial DF reads/writes because these don't read/write data for both logical threads and our scratch messages for 64-bit data need data for both threads to be present. Reviewed-by: Matt Turner --- .../dri/i965/brw_vec4_reg_allocate.cpp | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 5a5be857514..242e6664623 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -376,7 +376,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; - no_spill[i] = alloc.sizes[i] != 1; + no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2; } /* Calculate costs for spilling nodes. Call it a cost of 1 per @@ -393,7 +393,17 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) { spill_costs[inst->src[i].nr] += loop_scale; if (inst->src[i].reladdr || - inst->src[i].offset % REG_SIZE != 0) + inst->src[i].offset >= REG_SIZE) + no_spill[inst->src[i].nr] = true; + + /* We don't support unspills of partial DF reads. + * + * Our 64-bit unspills are implemented with two 32-bit scratch + * messages, each one reading that for both SIMD4x2 threads that + * we need to shuffle into correct 64-bit data. Ensure that we + * are reading data for both threads. + */ + if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8) no_spill[inst->src[i].nr] = true; } } @@ -401,7 +411,16 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) { spill_costs[inst->dst.nr] += loop_scale; - if (inst->dst.reladdr || inst->dst.offset % REG_SIZE != 0) + if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE) + no_spill[inst->dst.nr] = true; + + /* We don't support spills of partial DF writes. + * + * Our 64-bit spills are implemented with two 32-bit scratch messages, + * each one writing that for both SIMD4x2 threads. Ensure that we + * are writing data for both threads. + */ + if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8) no_spill[inst->dst.nr] = true; } @@ -450,8 +469,9 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g) void vec4_visitor::spill_reg(int spill_reg_nr) { - assert(alloc.sizes[spill_reg_nr] == 1); - unsigned int spill_offset = last_scratch++; + assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2); + unsigned int spill_offset = last_scratch; + last_scratch += alloc.sizes[spill_reg_nr]; /* Generate spill/unspill instructions for the objects being spilled. */ int scratch_reg = -1; @@ -465,12 +485,14 @@ vec4_visitor::spill_reg(int spill_reg_nr) * for consecutive instructions that read different channels of * the same vec4. */ - scratch_reg = alloc.allocate(1); + scratch_reg = alloc.allocate(alloc.sizes[spill_reg_nr]); src_reg temp = inst->src[i]; temp.nr = scratch_reg; + temp.offset = 0; temp.swizzle = BRW_SWIZZLE_XYZW; emit_scratch_read(block, inst, dst_reg(temp), inst->src[i], spill_offset); + temp.offset = inst->src[i].offset; } assert(scratch_reg != -1); inst->src[i].nr = scratch_reg; -- 2.30.2