From 531feec9dc4680046f21c517d13312c7df7b7619 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 15 Aug 2014 15:03:44 -0700 Subject: [PATCH] i965/vec4: Handle destination writemasks in VEC4_OPCODE_PACK_BYTES. Since pack_bytes expands to two mov(4) align1 instructions, we can't use swizzles directly. For an instruction like pack_bytes m4.y:UD, vgrf13.xyzw:UD we can write into the .y component by settings the offset based on the swizzle. Also while we're doing this, we can set the dependency control hints properly, so that a series of pack_bytes writing into separate components of a register can issue without blocking. --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 22db81323e7..b353539c6aa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1508,6 +1508,10 @@ vec4_generator::generate_code(const cfg_t *cfg) * * where they pack the four bytes from the low and high four DW. */ + assert(is_power_of_two(dst.dw1.bits.writemask) && + dst.dw1.bits.writemask != 0); + unsigned offset = __builtin_ctz(dst.dw1.bits.writemask); + dst.type = BRW_REGISTER_TYPE_UB; brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -1516,14 +1520,17 @@ vec4_generator::generate_code(const cfg_t *cfg) src[0].vstride = BRW_VERTICAL_STRIDE_4; src[0].width = BRW_WIDTH_1; src[0].hstride = BRW_HORIZONTAL_STRIDE_0; + dst.subnr = offset * 4; struct brw_inst *insn = brw_MOV(p, dst, src[0]); brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4); brw_inst_set_no_dd_clear(brw, insn, true); + brw_inst_set_no_dd_check(brw, insn, inst->no_dd_check); src[0].subnr = 16; - dst.subnr = 16; + dst.subnr = 16 + offset * 4; insn = brw_MOV(p, dst, src[0]); brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4); + brw_inst_set_no_dd_clear(brw, insn, inst->no_dd_clear); brw_inst_set_no_dd_check(brw, insn, true); brw_set_default_access_mode(p, BRW_ALIGN_16); @@ -1540,7 +1547,11 @@ vec4_generator::generate_code(const cfg_t *cfg) abort(); } - if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { + if (inst->opcode == VEC4_OPCODE_PACK_BYTES) { + /* Handled dependency hints in the generator. */ + + assert(!inst->conditional_mod); + } else if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"conditional_mod, no_dd_check, or no_dd_clear set for IR " "emitting more than 1 instruction"); -- 2.30.2