From: Matt Turner Date: Mon, 10 Mar 2014 20:26:30 +0000 (-0700) Subject: i965/vec4: Add VEC4_OPCODE_PACK_4_BYTES. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e14c7c7faff3c204a5eefc1f2ea487d4730b8382;p=mesa.git i965/vec4: Add VEC4_OPCODE_PACK_4_BYTES. Will be used by emit_pack_{s,u}norm_4x8(). --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index ed94bcc5bbf..fa473d08ef6 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -908,6 +908,8 @@ enum opcode { SHADER_OPCODE_GEN4_SCRATCH_WRITE, SHADER_OPCODE_GEN7_SCRATCH_READ, + VEC4_OPCODE_PACK_BYTES, + FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b694b6d0d17..61ea2079140 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -447,6 +447,9 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_GEN7_SCRATCH_READ: return "gen7_scratch_read"; + case VEC4_OPCODE_PACK_BYTES: + return "pack_bytes"; + case FS_OPCODE_DDX: return "ddx"; case FS_OPCODE_DDY: diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index db6f6a38ffa..534b4b0668f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -352,6 +352,12 @@ vec4_visitor::opt_reduce_swizzle() /* Determine which channels of the sources are read. */ switch (inst->opcode) { + case VEC4_OPCODE_PACK_BYTES: + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = 2; + swizzle[3] = 3; + break; case BRW_OPCODE_DP4: case BRW_OPCODE_DPH: /* FINISHME: DPH reads only three channels of src0, * but all four of src1. @@ -1018,6 +1024,12 @@ vec4_instruction::reswizzle(int dst_writemask, int swizzle) if (src[i].file == BAD_FILE || src[i].file == IMM) continue; + /* Destination write mask doesn't correspond to source swizzle for the + * pack_bytes instruction. + */ + if (opcode == VEC4_OPCODE_PACK_BYTES) + continue; + for (int c = 0; c < 4; c++) { new_swizzle[c] = BRW_GET_SWZ(src[i].swizzle, BRW_GET_SWZ(swizzle, c)); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 058b2baa098..22db81323e7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1495,6 +1495,41 @@ vec4_generator::generate_code(const cfg_t *cfg) generate_unpack_flags(inst, dst); break; + case VEC4_OPCODE_PACK_BYTES: { + /* Is effectively: + * + * mov(8) dst<16,4,1>:UB src<4,1,0>:UB + * + * but destinations' only regioning is horizontal stride, so instead we + * have to use two instructions: + * + * mov(4) dst<1>:UB src<4,1,0>:UB + * mov(4) dst.16<1>:UB src.16<4,1,0>:UB + * + * where they pack the four bytes from the low and high four DW. + */ + dst.type = BRW_REGISTER_TYPE_UB; + + brw_set_default_access_mode(p, BRW_ALIGN_1); + + src[0].type = BRW_REGISTER_TYPE_UB; + src[0].vstride = BRW_VERTICAL_STRIDE_4; + src[0].width = BRW_WIDTH_1; + src[0].hstride = BRW_HORIZONTAL_STRIDE_0; + struct brw_inst *insn = brw_MOV(p, dst, src[0]); + brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4); + brw_inst_set_no_dd_clear(brw, insn, true); + + src[0].subnr = 16; + dst.subnr = 16; + insn = brw_MOV(p, dst, src[0]); + brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_4); + brw_inst_set_no_dd_check(brw, insn, true); + + brw_set_default_access_mode(p, BRW_ALIGN_16); + break; + } + default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",