From 572d4646f70b7071af69ce65d93511a9539ffcc7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 24 Aug 2013 13:00:07 -0400 Subject: [PATCH] freedreno/a3xx/compiler: bit of re-arrange/cleanup It seems there are a number of cases where instructions have limitations about taking reading src's from const register file, so make get_unconst() a bit easier to use. Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++-------- 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index e6c5bb71e99..b5cdda8aabf 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -91,6 +91,7 @@ struct fd3_compile_context { unsigned next_inloc; unsigned num_internal_temps; + struct tgsi_src_register internal_temps[6]; /* track registers which need to synchronize w/ "complex alu" cat3 * instruction pipeline: @@ -128,7 +129,7 @@ struct fd3_compile_context { * up the vector operation */ struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; }; @@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) /* Get internal-temp src/dst to use for a sequence of instructions * generated by a single TGSI op. */ -static void +static struct tgsi_src_register * get_internal_temp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) + struct tgsi_dst_register *tmp_dst) { + struct tgsi_src_register *tmp_src; int n; tmp_dst->File = TGSI_FILE_TEMPORARY; @@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx, /* assign next temporary: */ n = ctx->num_internal_temps++; + compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); + tmp_src = &ctx->internal_temps[n]; tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; src_from_dst(tmp_src, tmp_dst); + + return tmp_src; } /* same as get_internal_temp, but w/ src.xxxx (for instructions that * replicate their results) */ -static void +static struct tgsi_src_register * get_internal_temp_repl(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) + struct tgsi_dst_register *tmp_dst) { - get_internal_temp(ctx, tmp_dst, tmp_src); + struct tgsi_src_register *tmp_src = + get_internal_temp(ctx, tmp_dst); tmp_src->SwizzleX = tmp_src->SwizzleY = tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; + return tmp_src; } static inline bool @@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src) (src->File == TGSI_FILE_IMMEDIATE); } +/* for instructions that cannot take a const register as src, if needed + * generate a move to temporary gpr: + */ +static struct tgsi_src_register * +get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) +{ + if (is_const(src)) { + static struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src = + get_internal_temp(ctx, &tmp_dst); + create_mov(ctx, &tmp_dst, src); + src = tmp_src; + } + return src; +} + static void get_immediate(struct fd3_compile_context *ctx, struct tgsi_src_register *reg, uint32_t val) @@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx, reg->SwizzleW = swiz2tgsi[swiz]; } -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ -static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src, - struct tgsi_src_register *tmp_src) +static type_t +get_ftype(struct fd3_compile_context *ctx) { - static struct tgsi_dst_register tmp_dst; - if ((src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE)) { - get_internal_temp(ctx, &tmp_dst, tmp_src); - create_mov(ctx, &tmp_dst, src); - src = tmp_src; - } - return src; + return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; } static type_t -get_type(struct fd3_compile_context *ctx) +get_utype(struct fd3_compile_context *ctx) { - return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; + return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; } static unsigned @@ -436,7 +447,7 @@ static void create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src) { - type_t type_mov = get_type(ctx); + type_t type_mov = get_ftype(ctx); unsigned i; for (i = 0; i < 4; i++) { @@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_src_register *src = &inst->Src[i].Register; if ((src->File == dst->File) && (src->Index == dst->Index)) { - get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); + ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); ctx->tmp_dst.WriteMask = dst->WriteMask; dst = &ctx->tmp_dst; break; @@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, { /* if necessary, add mov back into original dst: */ if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); + create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); } } @@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; struct tgsi_dst_register *dst = &inst->Dst[0].Register; struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; @@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t, unsigned n = t->arg; /* number of components */ unsigned i; - get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); /* Blob compiler never seems to use a const in src1 position for * mad.*, although there does seem (according to disassembler @@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t, * because after that point we no longer need tmp.x: */ create_mov(ctx, &tmp_dst, src1); - src1 = &tmp_src; + src1 = tmp_src; } instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); @@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t, add_dst_reg(ctx, instr, &tmp_dst, 0); add_src_reg(ctx, instr, src0, swiz0[i]); add_src_reg(ctx, instr, src1, swiz1[i]); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); } /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ @@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t, instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); add_dst_reg(ctx, instr, &tmp_dst, 0); add_src_reg(ctx, instr, src1, swiz1[i]); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); n++; } @@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t, ir3_instr_create(ctx->ir, 0, OPC_NOP); } - create_mov(ctx, dst, &tmp_src); + create_mov(ctx, dst, tmp_src); } /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ @@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst1, tmp_dst2; - struct tgsi_src_register tmp_src1, tmp_src2; + struct tgsi_src_register *tmp_src1, *tmp_src2; struct tgsi_src_register tmp_const; - get_internal_temp(ctx, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, &tmp_dst2, &tmp_src2); + tmp_src1 = get_internal_temp(ctx, &tmp_dst1); + tmp_src2 = get_internal_temp(ctx, &tmp_dst2); get_immediate(ctx, &tmp_const, fui(1.0)); @@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t, /* tmp2 = tmp2 * c */ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); vectorize(ctx, instr, &tmp_dst2, 2, - &tmp_src2, 0, + tmp_src2, 0, &inst->Src[2].Register, 0); /* dst = tmp1 + tmp2 */ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); vectorize(ctx, instr, &inst->Dst[0].Register, 2, - &tmp_src1, 0, - &tmp_src2, 0); + tmp_src1, 0, + tmp_src2, 0); } /* FRC(x) = x - FLOOR(x) */ @@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); /* tmp = FLOOR(x) */ instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); @@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t, instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); vectorize(ctx, instr, &inst->Dst[0].Register, 2, &inst->Src[0].Register, 0, - &tmp_src, IR3_REG_NEGATE); + tmp_src, IR3_REG_NEGATE); } /* POW(a,b) = EXP2(b * LOG2(a)) */ @@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t, struct ir3_instruction *instr; struct ir3_register *r; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; struct tgsi_dst_register *dst = &inst->Dst[0].Register; struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; - get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); /* log2 Rtmp, Rsrc0 */ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; @@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t, /* mul.f Rtmp, Rtmp, Rsrc1 */ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); add_src_reg(ctx, instr, src1, src1->SwizzleX); /* blob compiler seems to ensure there are at least 6 instructions @@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t, /* exp2 Rdst, Rtmp */ instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); r = add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); regmask_set(ctx->needs_ss, r); - create_mov(ctx, dst, &tmp_src); + create_mov(ctx, dst, tmp_src); } /* texture fetch/sample instructions: */ @@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t, { struct ir3_register *r; struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; struct tgsi_src_register *coord = &inst->Src[0].Register; struct tgsi_src_register *samp = &inst->Src[1].Register; unsigned tex = inst->Texture.Texture; @@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t, */ for (i = 1; (i < 4) && (order[i] >= 0); i++) { if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { - type_t type_mov = get_type(ctx); + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + + type_t type_mov = get_ftype(ctx); /* need to move things around: */ - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); for (j = 0; (j < 4) && (order[j] >= 0); j++) { instr = ir3_instr_create(ctx->ir, 1, 0); @@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t, src_swiz(coord, order[j])); } - coord = &tmp_src; + coord = tmp_src; if (j < 4) ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; @@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t, } instr = ir3_instr_create(ctx->ir, 5, t->opc); - instr->cat5.type = get_type(ctx); + instr->cat5.type = get_ftype(ctx); instr->cat5.samp = samp->Index; instr->cat5.tex = samp->Index; instr->flags |= flags; @@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; struct tgsi_src_register constval; /* final instruction uses original src1 and src2, so we need get_dst() */ struct tgsi_dst_register *dst = get_dst(ctx, inst); - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); /* cmps.f.ge tmp, src0, 0.0 */ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); @@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t, instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); instr->repeat = 3; add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R; + add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; /* sel.{f32,f16} dst, src2, tmp, src1 */ @@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t, OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &inst->Src[2].Register, 0, - &tmp_src, 0, + tmp_src, 0, &inst->Src[1].Register, 0); put_dst(ctx, inst, dst); @@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t, { struct tgsi_dst_register *dst = get_dst(ctx, inst); struct tgsi_src_register *src1; - struct tgsi_src_register tmp_src; struct ir3_instruction *instr; /* Blob compiler never seems to use a const in src1 position.. @@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t, * const. Not sure if this is a hw bug, or simply that the * disassembler lies. */ - src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src); + src1 = get_unconst(ctx, &inst->Src[1].Register); instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? t->hopc : t->opc); @@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t, { struct tgsi_dst_register *dst = get_dst(ctx, inst); struct tgsi_src_register *src; - struct tgsi_src_register tmp_src; struct ir3_instruction *instr; /* seems like blob compiler avoids const as src.. */ - src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src); + src = get_unconst(ctx, &inst->Src[0].Register); ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; instr = ir3_instr_create(ctx->ir, 4, t->opc); -- 2.30.2