#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
-#include "freedreno_lowering.h"
#include "freedreno_util.h"
#include "ir3_compiler.h"
{
unsigned ret;
struct tgsi_shader_info *info = &ctx->info;
- struct fd_lowering_config lconfig = {
+ struct tgsi_lowering_config lconfig = {
.color_two_side = so->key.color_two_side,
.lower_DST = true,
.lower_XPD = true,
break;
}
- ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
+ ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
ctx->free_tokens = !!ctx->tokens;
if (!ctx->tokens) {
/* no lowering */
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXF:
info->args = 2;
break;
case TGSI_OPCODE_TXP:
info->flags |= IR3_INSTR_P;
/* fallthrough */
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXD:
info->args = 1;
break;
}
info->order[arg] = pos++;
if (tgt->dims == 1)
info->order[pos++] = -1;
+ if (tgt->shadow)
+ info->order[pos++] = MAX2(arg + tgt->array, 2);
if (tgt->array)
info->order[pos++] = arg++;
- if (tgt->shadow)
- info->order[pos++] = MAX2(arg, 2);
if (info->flags & IR3_INSTR_P)
info->order[pos++] = 3;
/* fix up .y coord: */
if (is_1d(tex)) {
+ struct ir3_register *imm;
instr = instr_create(ctx, 1, 0); /* mov */
instr->cat1.src_type = type_mov;
instr->cat1.dst_type = type_mov;
add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5;
+ imm = ir3_reg_create(instr, 0, IR3_REG_IMMED);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXF)
+ imm->iim_val = 0;
+ else
+ imm->fim_val = 0.5;
}
coord = tmp_src;
struct ir3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct ir3_instruction *instr;
+ struct ir3_instruction *instr, *collect;
+ struct ir3_register *reg;
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
- struct tgsi_src_register *orig, *coord, *samp;
+ struct tgsi_src_register *orig, *coord, *samp, *offset, *dpdx, *dpdy;
+ struct tgsi_src_register zero;
+ const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
struct tex_info tinf;
+ int i;
memset(&tinf, 0, sizeof(tinf));
fill_tex_info(ctx, inst, &tinf);
coord = get_tex_coord(ctx, inst, &tinf);
+ get_immediate(ctx, &zero, 0);
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TXB2:
orig = &inst->Src[1].Register;
samp = &inst->Src[2].Register;
- } else {
+ break;
+ case TGSI_OPCODE_TXD:
+ orig = &inst->Src[0].Register;
+ dpdx = &inst->Src[1].Register;
+ dpdy = &inst->Src[2].Register;
+ samp = &inst->Src[3].Register;
+ if (is_rel_or_const(dpdx))
+ dpdx = get_unconst(ctx, dpdx);
+ if (is_rel_or_const(dpdy))
+ dpdy = get_unconst(ctx, dpdy);
+ break;
+ default:
orig = &inst->Src[0].Register;
samp = &inst->Src[1].Register;
+ break;
}
if (tinf.args > 1 && is_rel_or_const(orig))
orig = get_unconst(ctx, orig);
+ /* scale up integer coords for TXF based on the LOD */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+ type_t type_mov = get_utype(ctx);
+
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
+ for (i = 0; i < tgt->dims; i++) {
+ instr = instr_create(ctx, 2, OPC_SHL_B);
+ add_dst_reg(ctx, instr, &tmp_dst, i);
+ add_src_reg(ctx, instr, coord, src_swiz(coord, i));
+ add_src_reg(ctx, instr, orig, orig->SwizzleW);
+ }
+ if (tgt->dims < 2) {
+ instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, &tmp_dst, i);
+ add_src_reg(ctx, instr, &zero, 0);
+ i++;
+ }
+ if (tgt->array) {
+ instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, &tmp_dst, i);
+ add_src_reg(ctx, instr, coord, src_swiz(coord, i));
+ }
+ coord = tmp_src;
+ }
+
+ if (inst->Texture.NumOffsets) {
+ struct tgsi_texture_offset *tex_offset = &inst->TexOffsets[0];
+ struct tgsi_src_register offset_src = {0};
+
+ offset_src.File = tex_offset->File;
+ offset_src.Index = tex_offset->Index;
+ offset_src.SwizzleX = tex_offset->SwizzleX;
+ offset_src.SwizzleY = tex_offset->SwizzleY;
+ offset_src.SwizzleZ = tex_offset->SwizzleZ;
+ offset = get_unconst(ctx, &offset_src);
+ tinf.flags |= IR3_INSTR_O;
+ }
+
instr = instr_create(ctx, 5, t->opc);
instr->cat5.type = get_ftype(ctx);
instr->cat5.samp = samp->Index;
instr->flags |= tinf.flags;
add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
- add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf.src_wrmask);
+ reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
+
+ collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
+ ir3_reg_create(collect, 0, 0);
+ for (i = 0; i < 4; i++)
+ if (tinf.src_wrmask & (1 << i))
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+ coord, src_swiz(coord, i));
+ else if (tinf.src_wrmask & ~((1 << i) - 1))
+ ir3_reg_create(collect, 0, 0);
+
+ /* Attach derivatives onto the end of the fan-in. Derivatives start after
+ * the 4th argument, so make sure that fi is padded up to 4 first.
+ */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+ while (collect->regs_count < 5)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
+ for (i = 0; i < tgt->dims; i++)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i);
+ if (tgt->dims < 2)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
+ for (i = 0; i < tgt->dims; i++)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i);
+ if (tgt->dims < 2)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
+ tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4;
+ }
+
+ reg->instr = collect;
+ reg->wrmask = tinf.src_wrmask;
+
+ /* The second argument contains the offsets, followed by the lod/bias
+ * argument. This is constructed more manually due to the dynamic nature.
+ */
+ if (inst->Texture.NumOffsets == 0 && tinf.args == 1)
+ return;
+
+ reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
+
+ collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
+ ir3_reg_create(collect, 0, 0);
+
+ if (inst->Texture.NumOffsets) {
+ for (i = 0; i < tgt->dims; i++)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+ offset, i);
+ if (tgt->dims < 2)
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), &zero, 0);
+ }
if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2)
- add_src_reg_wrmask(ctx, instr, orig, orig->SwizzleX, 0x1);
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+ orig, orig->SwizzleX);
else if (tinf.args > 1)
- add_src_reg_wrmask(ctx, instr, orig, orig->SwizzleW, 0x1);
+ ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+ orig, orig->SwizzleW);
+
+ reg->instr = collect;
+ reg->wrmask = (1 << (collect->regs_count - 1)) - 1;
}
static void
level = get_unconst(ctx, level);
instr = instr_create(ctx, 5, OPC_GETSIZE);
- instr->cat5.type = get_ftype(ctx);
+ instr->cat5.type = get_utype(ctx);
instr->cat5.samp = samp->Index;
instr->cat5.tex = samp->Index;
instr->flags |= tinf.flags;
put_dst(ctx, inst, dst);
}
+/*
+ * ISSG(a) = a < 0 ? -1 : a > 0 ? 1 : 0
+ * cmps.s.lt tmp_neg, a, 0 # 1 if a is negative
+ * cmps.s.gt tmp_pos, a, 0 # 1 if a is positive
+ * sub.u dst, tmp_pos, tmp_neg
+ */
+static void
+trans_issg(const struct instr_translater *t,
+ struct ir3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *a = &inst->Src[0].Register;
+ struct tgsi_dst_register neg_dst, pos_dst;
+ struct tgsi_src_register *neg_src, *pos_src;
+
+ neg_src = get_internal_temp(ctx, &neg_dst);
+ pos_src = get_internal_temp(ctx, &pos_dst);
+
+ /* cmps.s.lt neg, a, 0 */
+ instr = instr_create(ctx, 2, OPC_CMPS_S);
+ instr->cat2.condition = IR3_COND_LT;
+ vectorize(ctx, instr, &neg_dst, 2, a, 0, 0, IR3_REG_IMMED);
+
+ /* cmps.s.gt pos, a, 0 */
+ instr = instr_create(ctx, 2, OPC_CMPS_S);
+ instr->cat2.condition = IR3_COND_GT;
+ vectorize(ctx, instr, &pos_dst, 2, a, 0, 0, IR3_REG_IMMED);
+
+ /* sub.u dst, pos, neg */
+ instr = instr_create(ctx, 2, OPC_SUB_U);
+ vectorize(ctx, instr, dst, 2, pos_src, 0, neg_src, 0);
+
+ put_dst(ctx, inst, dst);
+}
+
+
/*
* Conditional / Flow control
}
/*
- * UMUL
+ * UMUL / UMAD
*
* There is no 32-bit multiply instruction, so splitting a and b into high and
* low components, we get that
* mull.u tmp0, a, b (mul low, i.e. al * bl)
* madsh.m16 tmp1, a, b, tmp0 (mul-add shift high mix, i.e. ah * bl << 16)
* madsh.m16 dst, b, a, tmp1 (i.e. al * bh << 16)
+ *
+ * For UMAD, replace first mull.u with mad.u16.
*/
static void
trans_umul(const struct instr_translater *t,
if (is_rel_or_const(b))
b = get_unconst(ctx, b);
- /* mull.u tmp0, a, b */
- instr = instr_create(ctx, 2, OPC_MULL_U);
- vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0);
+ if (t->tgsi_opc == TGSI_OPCODE_UMUL) {
+ /* mull.u tmp0, a, b */
+ instr = instr_create(ctx, 2, OPC_MULL_U);
+ vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0);
+ } else {
+ struct tgsi_src_register *c = &inst->Src[2].Register;
+
+ /* mad.u16 tmp0, a, b, c */
+ instr = instr_create(ctx, 3, OPC_MAD_U16);
+ vectorize(ctx, instr, &tmp0_dst, 3, a, 0, b, 0, c, 0);
+ }
/* madsh.m16 tmp1, a, b, tmp0 */
instr = instr_create(ctx, 3, OPC_MADSH_M16);
INSTR(NOT, instr_cat2, .opc = OPC_NOT_B),
INSTR(XOR, instr_cat2, .opc = OPC_XOR_B),
INSTR(UMUL, trans_umul),
+ INSTR(UMAD, trans_umul),
INSTR(UDIV, trans_idiv),
INSTR(IDIV, trans_idiv),
INSTR(MOD, trans_idiv),
INSTR(TXB, trans_samp, .opc = OPC_SAMB, .arg = TGSI_OPCODE_TXB),
INSTR(TXB2, trans_samp, .opc = OPC_SAMB, .arg = TGSI_OPCODE_TXB2),
INSTR(TXL, trans_samp, .opc = OPC_SAML, .arg = TGSI_OPCODE_TXL),
+ INSTR(TXD, trans_samp, .opc = OPC_SAMGQ, .arg = TGSI_OPCODE_TXD),
+ INSTR(TXF, trans_samp, .opc = OPC_ISAML, .arg = TGSI_OPCODE_TXF),
INSTR(TXQ, trans_txq),
INSTR(DDX, trans_deriv, .opc = OPC_DSX),
INSTR(DDY, trans_deriv, .opc = OPC_DSY),
INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
INSTR(UCMP, trans_ucmp),
+ INSTR(ISSG, trans_issg),
INSTR(IF, trans_if, .opc = OPC_CMPS_F),
INSTR(UIF, trans_if, .opc = OPC_CMPS_U),
INSTR(ELSE, trans_else),
struct ir3_block *block;
struct ir3_instruction **inputs;
unsigned i, j, actual_in;
- int ret = 0;
+ int ret = 0, max_bary;
assert(!so->ir);
}
ret = ir3_block_ra(block, so->type, key.half_precision,
- so->frag_coord, so->frag_face, &so->has_samp);
+ so->frag_coord, so->frag_face, &so->has_samp, &max_bary);
if (ret) {
DBG("RA failed!");
goto out;
*/
if (so->type == SHADER_VERTEX)
so->total_in = actual_in;
+ else
+ so->total_in = align(max_bary + 1, 4);
out:
if (ret) {