From a7ec366e503cc2b05d6920fa5027b0f001ae9e58 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jan 2018 08:39:46 +1000 Subject: [PATCH] r600/shader: refactor mul hi/lo instruction emission This just makes it a bit simpler for cayman vs eg Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 370 ++++++++----------------- 1 file changed, 116 insertions(+), 254 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index cfc3400f925..cf669781202 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5224,6 +5224,31 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } +static int emit_mul_int_op(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu_src) +{ + struct r600_bytecode_alu alu; + int i, r; + alu = *alu_src; + if (bc->chip_class == CAYMAN) { + for (i = 0; i < 4; i++) { + alu.dst.chan = i; + alu.dst.write = (i == alu_src->dst.chan); + alu.last = (i == 3); + + r = r600_bytecode_add_alu(bc, &alu); + if (r) + return r; + } + } else { + alu.last = 1; + r = r600_bytecode_add_alu(bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -5465,50 +5490,25 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } /* 2. tmp0.z = lo (tmp0.x * src2) */ - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = j; - alu.dst.write = (j == 2); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_UINT; - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 1; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[1], i); - } + alu.dst.sel = tmp0; + alu.dst.chan = 2; + alu.dst.write = 1; - alu.last = (j == 3); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + alu.src[0].sel = tmp0; + alu.src[0].chan = 0; + if (signed_op) { + alu.src[1].sel = tmp2; + alu.src[1].chan = 1; } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = 2; - alu.dst.write = 1; - - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 1; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[1], i); - } - - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); } + if ((r = emit_mul_int_op(ctx->bc, &alu))) + return r; + /* 3. tmp0.w = -tmp0.z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_SUB_INT; @@ -5526,51 +5526,26 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) return r; /* 4. tmp0.y = hi (tmp0.x * src2) */ - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULHI_UINT; - alu.dst.sel = tmp0; - alu.dst.chan = j; - alu.dst.write = (j == 1); + alu.dst.sel = tmp0; + alu.dst.chan = 1; + alu.dst.write = 1; - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; + alu.src[0].sel = tmp0; + alu.src[0].chan = 0; - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 1; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[1], i); - } - alu.last = (j == 3); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + if (signed_op) { + alu.src[1].sel = tmp2; + alu.src[1].chan = 1; } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = 1; - alu.dst.write = 1; - - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; - - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 1; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[1], i); - } - - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); } + if ((r = emit_mul_int_op(ctx->bc, &alu))) + return r; + /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDE_INT; @@ -5592,43 +5567,21 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) return r; /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = j; - alu.dst.write = (j == 3); - - alu.src[0].sel = tmp0; - alu.src[0].chan = 2; - - alu.src[1].sel = tmp0; - alu.src[1].chan = 0; - - alu.last = (j == 3); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } - } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULHI_UINT; - alu.dst.sel = tmp0; - alu.dst.chan = 3; - alu.dst.write = 1; + alu.dst.sel = tmp0; + alu.dst.chan = 3; + alu.dst.write = 1; - alu.src[0].sel = tmp0; - alu.src[0].chan = 2; + alu.src[0].sel = tmp0; + alu.src[0].chan = 2; - alu.src[1].sel = tmp0; - alu.src[1].chan = 0; + alu.src[1].sel = tmp0; + alu.src[1].chan = 0; - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) + if ((r = emit_mul_int_op(ctx->bc, &alu))) return r; - } /* 7. tmp1.x = tmp0.x - tmp0.w */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -5685,98 +5638,46 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) return r; /* 10. tmp0.z = hi(tmp0.x * src1) = q */ - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = j; - alu.dst.write = (j == 2); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULHI_UINT; - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; + alu.dst.sel = tmp0; + alu.dst.chan = 2; + alu.dst.write = 1; - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 0; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[0], i); - } + alu.src[0].sel = tmp0; + alu.src[0].chan = 0; - alu.last = (j == 3); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + if (signed_op) { + alu.src[1].sel = tmp2; + alu.src[1].chan = 0; } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULHI_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = 2; - alu.dst.write = 1; - - alu.src[0].sel = tmp0; - alu.src[0].chan = 0; - - if (signed_op) { - alu.src[1].sel = tmp2; - alu.src[1].chan = 0; - } else { - r600_bytecode_src(&alu.src[1], &ctx->src[0], i); - } - - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } - /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = j; - alu.dst.write = (j == 1); + if ((r = emit_mul_int_op(ctx->bc, &alu))) + return r; - if (signed_op) { - alu.src[0].sel = tmp2; - alu.src[0].chan = 1; - } else { - r600_bytecode_src(&alu.src[0], &ctx->src[1], i); - } + /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_UINT; - alu.src[1].sel = tmp0; - alu.src[1].chan = 2; + alu.dst.sel = tmp0; + alu.dst.chan = 1; + alu.dst.write = 1; - alu.last = (j == 3); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + if (signed_op) { + alu.src[0].sel = tmp2; + alu.src[0].chan = 1; } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - - alu.dst.sel = tmp0; - alu.dst.chan = 1; - alu.dst.write = 1; - - if (signed_op) { - alu.src[0].sel = tmp2; - alu.src[0].chan = 1; - } else { - r600_bytecode_src(&alu.src[0], &ctx->src[1], i); - } + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + } - alu.src[1].sel = tmp0; - alu.src[1].chan = 2; + alu.src[1].sel = tmp0; + alu.src[1].chan = 2; - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + if ((r = emit_mul_int_op(ctx->bc, &alu))) + return r; /* 12. tmp0.w = src1 - tmp0.y = r */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -7548,38 +7449,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r; /* temp.x = sample_index*4 */ - if (ctx->bc->chip_class == CAYMAN) { - for (i = 0 ; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_INT; - alu.src[0].sel = src_gpr; - alu.src[0].chan = sample_chan; - alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = 4; - alu.dst.sel = temp; - alu.dst.chan = i; - alu.dst.write = i == 0; - if (i == 3) - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; - } - } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_INT; - alu.src[0].sel = src_gpr; - alu.src[0].chan = sample_chan; - alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = 4; - alu.dst.sel = temp; - alu.dst.chan = 0; - alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; - } + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_INT; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = 0; + alu.dst.write = 1; + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; /* sample_index = temp.w >> temp.x */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -9966,7 +9847,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; - int i, j, k, r; + int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); /* src0 * src1 */ @@ -9974,40 +9855,21 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - if (ctx->bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - - alu.op = ALU_OP2_MULLO_UINT; - for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { - r600_bytecode_src(&alu.src[k], &ctx->src[k], i); - } - alu.dst.chan = j; - alu.dst.sel = ctx->temp_reg; - alu.dst.write = (j == i); - if (j == 3) - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; - } - } else { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - - alu.dst.chan = i; - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - for (j = 0; j < 2; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], i); - } + alu.dst.chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.op = ALU_OP2_MULLO_UINT; + for (j = 0; j < 2; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } + + alu.last = 1; + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; } -- 2.30.2