From 270f6d194c124294d7ca8256c6bf7b6010e8510c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 14 Dec 2010 20:49:31 +0100 Subject: [PATCH] r600g: optimize temp register handling for LRP --- src/gallium/drivers/r600/r600_shader.c | 72 ++++++++++++++------------ 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3deabbca3a9..e00c844a17b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -864,19 +864,25 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ return 0; } -static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) +static int tgsi_last_instruction(unsigned writemask) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; - struct r600_bc_alu alu; - int i, j, r; - int lasti = 0; + int i, lasti = 0; for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { + if (writemask & (1 << i)) { lasti = i; } } + return lasti; +} + +static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu alu; + int i, j, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1037,7 +1043,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_setup_trig(ctx, r600_src); if (r) @@ -1057,10 +1063,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; /* replicate result */ - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) - lasti = i; - } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1587,13 +1589,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - int lasti = 0; - - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - lasti = i; - } - } + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1937,6 +1933,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; @@ -1947,7 +1944,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; /* 1 - src0 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -1957,7 +1957,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1970,7 +1970,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) return r; /* (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; @@ -1979,7 +1982,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].chan = tgsi_chan(&inst->Src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1992,7 +1995,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) return r; /* src0 * src1 + (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2002,16 +2008,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1].chan = tgsi_chan(&inst->Src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_cmp(struct r600_shader_ctx *ctx) @@ -2020,13 +2030,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - int lasti = 0; - - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - lasti = i; - } - } + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); r = tgsi_split_constant(ctx, r600_src); if (r) -- 2.30.2