From 52fb22b64678b1dc855cffc9998103b31890a0c5 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 28 Jun 2016 12:03:09 +0200 Subject: [PATCH] i965/vec4: fix scratch writes for 64bit data Mostly the same stuff as usual: we ned to shuffle the data before we write and we need to emit two 32-bit write messages (with appropriate 32-bit writemask channels set) for a full dvec4 scratch write. v2: use byte_offset() instead of offset(). Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 64 ++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 2edbabb8edf..2d0822b5a5a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1518,17 +1518,63 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, * weren't initialized, it will confuse live interval analysis, which will * make spilling fail to make progress. */ - const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type), + bool is_64bit = type_sz(inst->dst.type) == 8; + const glsl_type *alloc_type = + is_64bit ? glsl_type::dvec4_type : glsl_type::vec4_type; + const src_reg temp = swizzle(retype(src_reg(this, alloc_type), inst->dst.type), brw_swizzle_for_mask(inst->dst.writemask)); - dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), - inst->dst.writemask)); - vec4_instruction *write = SCRATCH_WRITE(dst, temp, index); - if (inst->opcode != BRW_OPCODE_SEL) - write->predicate = inst->predicate; - write->ir = inst->ir; - write->annotation = inst->annotation; - inst->insert_after(block, write); + + if (!is_64bit) { + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + inst->dst.writemask)); + vec4_instruction *write = SCRATCH_WRITE(dst, temp, index); + if (inst->opcode != BRW_OPCODE_SEL) + write->predicate = inst->predicate; + write->ir = inst->ir; + write->annotation = inst->annotation; + inst->insert_after(block, write); + } else { + dst_reg shuffled = dst_reg(this, alloc_type); + vec4_instruction *last = + shuffle_64bit_data(shuffled, temp, true, block, inst); + src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F)); + + uint8_t mask = 0; + if (inst->dst.writemask & WRITEMASK_X) + mask |= WRITEMASK_XY; + if (inst->dst.writemask & WRITEMASK_Y) + mask |= WRITEMASK_ZW; + if (mask) { + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask)); + + vec4_instruction *write = SCRATCH_WRITE(dst, shuffled_float, index); + if (inst->opcode != BRW_OPCODE_SEL) + write->predicate = inst->predicate; + write->ir = inst->ir; + write->annotation = inst->annotation; + last->insert_after(block, write); + } + + mask = 0; + if (inst->dst.writemask & WRITEMASK_Z) + mask |= WRITEMASK_XY; + if (inst->dst.writemask & WRITEMASK_W) + mask |= WRITEMASK_ZW; + if (mask) { + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask)); + + src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr, + reg_offset + 1); + vec4_instruction *write = + SCRATCH_WRITE(dst, byte_offset(shuffled_float, REG_SIZE), index); + if (inst->opcode != BRW_OPCODE_SEL) + write->predicate = inst->predicate; + write->ir = inst->ir; + write->annotation = inst->annotation; + last->insert_after(block, write); + } + } inst->dst.file = temp.file; inst->dst.nr = temp.nr; -- 2.30.2