From acef65503e79ce61a16bdba92462f0ed8a7b52c2 Mon Sep 17 00:00:00 2001 From: Xavier Bouchoux Date: Wed, 28 Jan 2015 02:20:51 +0100 Subject: [PATCH] r600g: fix abs() support on ALU 3 source operands instructions Since alu does not support abs() modifier on source operands, spill and apply the modifiers to a temp register when needed. Signed-off-by: Xavier Bouchoux Reviewed-by: Glenn Kennard --- src/gallium/drivers/r600/r600_asm.c | 6 +++ src/gallium/drivers/r600/r600_shader.c | 63 +++++++++++++++++++++++--- src/gallium/drivers/r600/r700_asm.c | 1 + 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index ce3c2d125a0..79e7f748cdf 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1133,6 +1133,11 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); + if (alu->is_op3) { + /* will fail later since alu does not support it. */ + assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); + } + if (bc->cf_last != NULL && bc->cf_last->op != type) { /* check if we could add it anyway */ if (bc->cf_last->op == CF_OP_ALU && @@ -1491,6 +1496,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod S_SQ_ALU_WORD0_LAST(alu->last); if (alu->is_op3) { + assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 16e820ee215..77c9909f5d8 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4862,6 +4862,39 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru return 0; } +static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, + unsigned temp, int temp_chan, + struct r600_bytecode_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) +{ + struct r600_bytecode_alu alu; + int r; + + r600_bytecode_src(bc_src, shader_src, chan); + + /* op3 operands don't support abs modifier */ + if (bc_src->abs) { + assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = temp; + alu.dst.chan = temp_chan; + alu.dst.write = 1; + + alu.src[0] = *bc_src; + alu.last = true; // sufficient? + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(bc_src, 0, sizeof(*bc_src)); + bc_src->sel = temp; + bc_src->chan = temp_chan; + } + return 0; +} + static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -4876,7 +4909,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], i); + r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, j, &alu.src[j], &ctx->src[j], i); + if (r) + return r; } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -5967,7 +6002,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - unsigned i; + unsigned i, extra_temp; int r; /* optimize if it's just an equal balance */ @@ -6037,6 +6072,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* src0 * src1 + (1 - src0) * src2 */ + if (ctx->src[0].abs || ctx->src[1].abs) /* XXX avoid dupliating condition */ + extra_temp = r600_get_temp(ctx); + else + extra_temp = 0; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -6044,8 +6083,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; - r600_bytecode_src(&alu.src[0], &ctx->src[0], i); - r600_bytecode_src(&alu.src[1], &ctx->src[1], i); + r = tgsi_make_src_for_op3(ctx, extra_temp, 0, &alu.src[0], &ctx->src[0], i); + if (r) + return r; + r = tgsi_make_src_for_op3(ctx, extra_temp, 1, &alu.src[1], &ctx->src[1], i); + if (r) + return r; alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -6074,9 +6117,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGE; - r600_bytecode_src(&alu.src[0], &ctx->src[0], i); - r600_bytecode_src(&alu.src[1], &ctx->src[2], i); - r600_bytecode_src(&alu.src[2], &ctx->src[1], i); + r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 0, &alu.src[0], &ctx->src[0], i); + if (r) + return r; + r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 1, &alu.src[1], &ctx->src[2], i); + if (r) + return r; + r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 2, &alu.src[2], &ctx->src[1], i); + if (r) + return r; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 4a9fa3612e1..04f8c6288f0 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -48,6 +48,7 @@ int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu * /* don't replace gpr by pv or ps for destination register */ if (alu->is_op3) { + assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | -- 2.30.2