From 2b28fd6ca603df40a5d02aac4035eced3a1d079a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 22 Mar 2010 10:05:42 -0700 Subject: [PATCH] i965: Add support for the MAD opcode on gen6+. v2: Fix MRF handling on gen7. Reviewed-by: Kenneth Graunke (v1) --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_disasm.c | 223 ++++++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_eu.h | 17 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 84 ++++++++- src/mesa/drivers/dri/i965/brw_structs.h | 37 ++++ 5 files changed, 342 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 029be872d77..38ce5d76118 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -620,6 +620,7 @@ enum opcode { BRW_OPCODE_DPA2 = 88, BRW_OPCODE_LINE = 89, BRW_OPCODE_PLN = 90, + BRW_OPCODE_MAD = 91, BRW_OPCODE_NOP = 126, /* These are compiler backend opcodes that get translated into other diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index a86c8f28edb..187bc0ab25d 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -51,6 +51,7 @@ struct { [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, @@ -578,6 +579,28 @@ static int dest (FILE *file, struct brw_instruction *inst) return 0; } +static int dest_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + uint32_t reg_file; + + if (inst->bits1.da3src.dest_reg_file) + reg_file = BRW_MESSAGE_REGISTER_FILE; + else + reg_file = BRW_GENERAL_REGISTER_FILE; + + err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da3src.dest_subreg_nr) + format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL); + + return 0; +} + static int src_align1_region (FILE *file, GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) { @@ -694,6 +717,156 @@ static int src_da16 (FILE *file, return err; } +static int src0_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); + if (err == -1) + return 0; + if (inst->bits2.da3src.src0_subreg_nr) + format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int src1_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; + GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | + (inst->bits3.da3src.src1_subreg_nr_high << 2)); + + err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src1_reg_nr); + if (err == -1) + return 0; + if (src1_subreg_nr) + format (file, ".%d", src1_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int src2_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src2_reg_nr); + if (err == -1) + return 0; + if (inst->bits3.da3src.src2_subreg_nr) + format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { switch (type) { @@ -924,25 +1097,39 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6) format (file, " %d", inst->header.destreg__conditionalmod); - if (opcode[inst->header.opcode].ndst > 0) { - pad (file, 16); - err |= dest (file, inst); - } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || - inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF || - inst->header.opcode == BRW_OPCODE_WHILE)) { - format (file, " %d", inst->bits1.branch_gen6.jump_count); - } else if (inst->header.opcode == BRW_OPCODE_JMPI) { - format (file, " %d", inst->bits3.d); - } + if (opcode[inst->header.opcode].nsrc == 3) { + pad (file, 16); + err |= dest_3src (file, inst); - if (opcode[inst->header.opcode].nsrc > 0) { - pad (file, 32); - err |= src0 (file, inst); - } - if (opcode[inst->header.opcode].nsrc > 1) { - pad (file, 48); - err |= src1 (file, inst); + pad (file, 32); + err |= src0_3src (file, inst); + + pad (file, 48); + err |= src1_3src (file, inst); + + pad (file, 64); + err |= src2_3src (file, inst); + } else { + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits1.branch_gen6.jump_count); + } else if (inst->header.opcode == BRW_OPCODE_JMPI) { + format (file, " %d", inst->bits3.d); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } } if (inst->header.opcode == BRW_OPCODE_SEND || diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index f6602221e59..dbc84be857f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -49,6 +49,13 @@ #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) +static inline bool brw_is_single_value_swizzle(int swiz) +{ + return (swiz == BRW_SWIZZLE_XXXX || + swiz == BRW_SWIZZLE_YYYY || + swiz == BRW_SWIZZLE_ZZZZ || + swiz == BRW_SWIZZLE_WWWW); +} #define REG_SIZE (8*4) @@ -847,10 +854,16 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg src0, \ struct brw_reg src1); +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2); + #define ROUND(OP) \ void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0); - ALU1(MOV) ALU2(SEL) ALU1(NOT) @@ -876,12 +889,14 @@ ALU2(DP3) ALU2(DP2) ALU2(LINE) ALU2(PLN) +ALU3(MAD) ROUND(RNDZ) ROUND(RNDE) #undef ALU1 #undef ALU2 +#undef ALU3 #undef ROUND diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 33471576446..839f6c36a3e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -749,6 +749,78 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, return insn; } +static int +get_3src_subreg_nr(struct brw_reg reg) +{ + if (reg.vstride == BRW_VERTICAL_STRIDE_0) { + assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); + return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); + } else { + return reg.subnr / 4; + } +} + +static struct brw_instruction *brw_alu3(struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) +{ + struct brw_instruction *insn = next_insn(p, opcode); + + gen7_convert_mrf_to_grf(p, &dest); + + assert(insn->header.access_mode == BRW_ALIGN_16); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE); + assert(dest.nr < 128); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.type = BRW_REGISTER_TYPE_F); + insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE); + insn->bits1.da3src.dest_reg_nr = dest.nr; + insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask; + guess_execution_size(p, insn, dest); + + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.address_mode == BRW_ADDRESS_DIRECT); + assert(src0.nr < 128); + assert(src0.type == BRW_REGISTER_TYPE_F); + insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle; + insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0); + insn->bits2.da3src.src0_reg_nr = src0.nr; + insn->bits1.da3src.src0_abs = src0.abs; + insn->bits1.da3src.src0_negate = src0.negate; + insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0; + + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.address_mode == BRW_ADDRESS_DIRECT); + assert(src1.nr < 128); + assert(src1.type == BRW_REGISTER_TYPE_F); + insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle; + insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3; + insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2; + insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0; + insn->bits3.da3src.src1_reg_nr = src1.nr; + insn->bits1.da3src.src1_abs = src1.abs; + insn->bits1.da3src.src1_negate = src1.negate; + + assert(src2.file == BRW_GENERAL_REGISTER_FILE); + assert(src2.address_mode == BRW_ADDRESS_DIRECT); + assert(src2.nr < 128); + assert(src2.type == BRW_REGISTER_TYPE_F); + insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle; + insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2); + insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0; + insn->bits3.da3src.src2_reg_nr = src2.nr; + insn->bits1.da3src.src2_abs = src2.abs; + insn->bits1.da3src.src2_negate = src2.negate; + + return insn; +} + /*********************************************************************** * Convenience routines. @@ -770,6 +842,16 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ } +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2) \ +{ \ + return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ +} + /* Rounding operations (other than RNDD) require two instructions - the first * stores a rounded value (possibly the wrong way) in the dest register, but * also sets a per-channel "increment bit" in the flag register. A predicated @@ -818,7 +900,7 @@ ALU2(DP3) ALU2(DP2) ALU2(LINE) ALU2(PLN) - +ALU3(MAD) ROUND(RNDZ) ROUND(RNDE) diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index d23ad0d91a0..8283abfd375 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1007,6 +1007,22 @@ struct brw_instruction GLint jump_count:16; } branch_gen6; + + struct { + GLuint dest_reg_file:1; + GLuint flag_subreg_num:1; + GLuint pad0:2; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src2_abs:1; + GLuint src2_negate:1; + GLuint pad1:7; + GLuint dest_writemask:4; + GLuint dest_subreg_nr:3; + GLuint dest_reg_nr:8; + } da3src; } bits1; @@ -1086,6 +1102,16 @@ struct brw_instruction GLuint sfid:4; } send_gen5; /* for Ironlake only */ + struct { + GLuint src0_rep_ctrl:1; + GLuint src0_swizzle:8; + GLuint src0_subreg_nr:3; + GLuint src0_reg_nr:8; + GLuint pad0:1; + GLuint src1_rep_ctrl:1; + GLuint src1_swizzle:8; + GLuint src1_subreg_nr_low:2; + } da3src; } bits2; union @@ -1466,6 +1492,17 @@ struct brw_instruction } gen7_dp; /** @} */ + struct { + GLuint src1_subreg_nr_high:1; + GLuint src1_reg_nr:8; + GLuint pad0:1; + GLuint src2_rep_ctrl:1; + GLuint src2_swizzle:8; + GLuint src2_subreg_nr:3; + GLuint src2_reg_nr:8; + GLuint pad1:2; + } da3src; + GLint d; GLuint ud; float f; -- 2.30.2