for (unsigned i = 0; i < this->alloc.count; i++) {
spill_costs[i] = 0.0;
- no_spill[i] = alloc.sizes[i] != 1;
+ no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2;
}
/* Calculate costs for spilling nodes. Call it a cost of 1 per
if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
spill_costs[inst->src[i].nr] += loop_scale;
if (inst->src[i].reladdr ||
- inst->src[i].offset % REG_SIZE != 0)
+ inst->src[i].offset >= REG_SIZE)
+ no_spill[inst->src[i].nr] = true;
+
+ /* We don't support unspills of partial DF reads.
+ *
+ * Our 64-bit unspills are implemented with two 32-bit scratch
+ * messages, each one reading that for both SIMD4x2 threads that
+ * we need to shuffle into correct 64-bit data. Ensure that we
+ * are reading data for both threads.
+ */
+ if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
no_spill[inst->src[i].nr] = true;
}
}
if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) {
spill_costs[inst->dst.nr] += loop_scale;
- if (inst->dst.reladdr || inst->dst.offset % REG_SIZE != 0)
+ if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE)
+ no_spill[inst->dst.nr] = true;
+
+ /* We don't support spills of partial DF writes.
+ *
+ * Our 64-bit spills are implemented with two 32-bit scratch messages,
+ * each one writing that for both SIMD4x2 threads. Ensure that we
+ * are writing data for both threads.
+ */
+ if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
no_spill[inst->dst.nr] = true;
}
void
vec4_visitor::spill_reg(int spill_reg_nr)
{
- assert(alloc.sizes[spill_reg_nr] == 1);
- unsigned int spill_offset = last_scratch++;
+ assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2);
+ unsigned int spill_offset = last_scratch;
+ last_scratch += alloc.sizes[spill_reg_nr];
/* Generate spill/unspill instructions for the objects being spilled. */
int scratch_reg = -1;
* for consecutive instructions that read different channels of
* the same vec4.
*/
- scratch_reg = alloc.allocate(1);
+ scratch_reg = alloc.allocate(alloc.sizes[spill_reg_nr]);
src_reg temp = inst->src[i];
temp.nr = scratch_reg;
+ temp.offset = 0;
temp.swizzle = BRW_SWIZZLE_XYZW;
emit_scratch_read(block, inst,
dst_reg(temp), inst->src[i], spill_offset);
+ temp.offset = inst->src[i].offset;
}
assert(scratch_reg != -1);
inst->src[i].nr = scratch_reg;