From: Christian König Date: Thu, 16 Dec 2010 15:42:14 +0000 (+0100) Subject: r600g: implement output modifiers and use them to further optimize LRP X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=08c8cd26b86b1d5514ff152ce853eae0541b02fd;p=mesa.git r600g: implement output modifiers and use them to further optimize LRP --- diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 4476f432b98..2a62b06cc60 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -707,6 +707,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b147f0f5c88..ac3ed3c8520 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -62,6 +62,7 @@ struct r600_bc_alu { unsigned bank_swizzle_force; u32 value[4]; int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + unsigned omod; }; struct r600_bc_tex { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9de08847263..80579b8d871 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1975,6 +1975,35 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; + + /* optimize if it's just an equal balance */ + if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + alu.omod = 3; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + + alu.dst.chan = i; + if (i == lasti) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; + } + /* 1 - src0 */ for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 892dee86baf..3eb6fb50ca7 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -61,6 +61,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |