From: Rob Clark Date: Wed, 29 Jan 2014 21:25:52 +0000 (-0500) Subject: freedreno/a3xx/compiler: prepare for new compiler X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a418573c4d7fc7f896e7077378d2b4daf98d5217;p=mesa.git freedreno/a3xx/compiler: prepare for new compiler Shuffle things around to prepare for new compiler. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 3dcec9dceac..7d67bf28066 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -42,4 +42,4 @@ a3xx_SOURCES := \ a3xx/fd3_util.c \ a3xx/fd3_zsa.c \ a3xx/disasm-a3xx.c \ - a3xx/ir-a3xx.c + a3xx/ir3.c diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c index 2d5ae62a64a..0e45ec54b38 100644 --- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c @@ -285,21 +285,7 @@ static void print_instr_cat2(instr_t *instr) static void print_instr_cat3(instr_t *instr) { instr_cat3_t *cat3 = &instr->cat3; - bool full = true; - - // XXX is this based on opc or some other bit? - switch (cat3->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - full = false; - break; - } + bool full = instr_cat3_full(cat3); printf(" "); print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false); @@ -747,26 +733,12 @@ struct opc_info { #undef OPC }; -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)])) - -static uint32_t getopc(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: return instr->cat6.opc; - default: return 0; - } -} +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)])) static void print_instr(uint32_t *dwords, int level, int n) { instr_t *instr = (instr_t *)dwords; - uint32_t opc = getopc(instr); + uint32_t opc = instr_opc(instr); const char *name; printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 2c32c0fa2a7..5ab34e557b9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -44,12 +44,13 @@ #include "fd3_util.h" #include "instr-a3xx.h" -#include "ir-a3xx.h" +#include "ir3.h" struct fd3_compile_context { const struct tgsi_token *tokens; struct ir3_shader *ir; + struct ir3_block *block; struct fd3_shader_stateobj *so; struct tgsi_parse_context parser; @@ -124,6 +125,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, ctx->tokens = tokens; ctx->ir = so->ir; + ctx->block = ir3_block_create(ctx->ir, 0, 0, 0); ctx->so = so; ctx->last_input = NULL; ctx->last_rel = NULL; @@ -176,7 +178,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...) _debug_vprintf(format, ap); va_end(ap); tgsi_dump(ctx->tokens, 0); - assert(0); + debug_assert(0); } #define compile_assert(ctx, cond) do { \ @@ -208,11 +210,17 @@ handle_last_rel(struct fd3_compile_context *ctx) } } +static struct ir3_instruction * +instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) +{ + return ir3_instr_create(ctx->block, category, opc); +} + static void add_nop(struct fd3_compile_context *ctx, unsigned count) { while (count-- > 0) - ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr_create(ctx, 0, OPC_NOP); } static unsigned @@ -241,6 +249,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) { unsigned flags = 0, num = 0; + struct ir3_register *reg; switch (dst->File) { case TGSI_FILE_OUTPUT: @@ -256,10 +265,17 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, break; } + if (dst->Indirect) + flags |= IR3_REG_RELATIV; if (ctx->so->half_precision) flags |= IR3_REG_HALF; - return ir3_reg_create(instr, regid(num, chan), flags); + reg = ir3_reg_create(instr, regid(num, chan), flags); + + if (dst->Indirect) + ctx->last_rel = instr; + + return reg; } static struct ir3_register * @@ -517,9 +533,9 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, /* can't have abs or neg on a mov instr, so use * absneg.f instead to handle these cases: */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F); + instr = instr_create(ctx, 2, OPC_ABSNEG_F); } else { - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; } @@ -539,10 +555,10 @@ create_clamp(struct fd3_compile_context *ctx, { struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + instr = instr_create(ctx, 2, OPC_MAX_F); vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); + instr = instr_create(ctx, 2, OPC_MIN_F); vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); } @@ -707,7 +723,7 @@ trans_arl(const struct instr_translater *t, tmp_src = get_internal_temp_hr(ctx, &tmp_dst); /* cov.{f32,f16}s16 Rtmp, Rsrc */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = get_ftype(ctx); instr->cat1.dst_type = TYPE_S16; add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; @@ -716,7 +732,7 @@ trans_arl(const struct instr_translater *t, add_nop(ctx, 3); /* shl.b Rtmp, Rtmp, 2 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B); + instr = instr_create(ctx, 2, OPC_SHL_B); add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; @@ -724,7 +740,7 @@ trans_arl(const struct instr_translater *t, add_nop(ctx, 3); /* mova a0, Rtmp */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = TYPE_S16; instr->cat1.dst_type = TYPE_S16; add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; @@ -804,7 +820,7 @@ trans_samp(const struct instr_translater *t, tmp_src = get_internal_temp(ctx, &tmp_dst); for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; add_dst_reg(ctx, instr, &tmp_dst, j); @@ -817,7 +833,7 @@ trans_samp(const struct instr_translater *t, add_nop(ctx, 4 - j); } - instr = ir3_instr_create(ctx->ir, 5, t->opc); + instr = instr_create(ctx, 5, t->opc); instr->cat5.type = get_ftype(ctx); instr->cat5.samp = samp->Index; instr->cat5.tex = samp->Index; @@ -915,7 +931,7 @@ trans_cmp(const struct instr_translater *t, a0 = get_unconst(ctx, a0); /* cmps.f.ge tmp, a0, a1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr = instr_create(ctx, 2, OPC_CMPS_F); instr->cat2.condition = condition; vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); @@ -924,7 +940,7 @@ trans_cmp(const struct instr_translater *t, case TGSI_OPCODE_SGE: case TGSI_OPCODE_SLE: /* cov.u16f16 dst, tmp0 */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = get_utype(ctx); instr->cat1.dst_type = get_ftype(ctx); vectorize(ctx, instr, dst, 1, tmp_src, 0); @@ -934,12 +950,12 @@ trans_cmp(const struct instr_translater *t, case TGSI_OPCODE_SLT: case TGSI_OPCODE_CMP: /* add.s tmp, tmp, -1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr = instr_create(ctx, 2, OPC_ADD_S); vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED); if (t->tgsi_opc == TGSI_OPCODE_CMP) { /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &inst->Src[2].Register, 0, @@ -949,7 +965,7 @@ trans_cmp(const struct instr_translater *t, get_immediate(ctx, &constval0, fui(0.0)); get_immediate(ctx, &constval1, fui(1.0)); /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &constval0, 0, tmp_src, 0, &constval1, 0); @@ -990,7 +1006,7 @@ pop_branch(struct fd3_compile_context *ctx) * and set (jp) flag on whatever the next instruction was, rather * than inserting an extra nop.. */ - instr = ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr = instr_create(ctx, 0, OPC_NOP); instr->flags |= IR3_INSTR_JP; /* pop the branch instruction from the stack and fix up branch target: */ @@ -1018,13 +1034,13 @@ trans_if(const struct instr_translater *t, if (is_const(src)) src = get_unconst(ctx, src); - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr = instr_create(ctx, 2, OPC_CMPS_F); ir3_reg_create(instr, regid(REG_P0, 0), 0); add_src_reg(ctx, instr, src, src->SwizzleX); add_src_reg(ctx, instr, &constval, constval.SwizzleX); instr->cat2.condition = IR3_COND_EQ; - instr = ir3_instr_create(ctx->ir, 0, OPC_BR); + instr = instr_create(ctx, 0, OPC_BR); push_branch(ctx, instr); } @@ -1036,7 +1052,7 @@ trans_else(const struct instr_translater *t, struct ir3_instruction *instr; /* for first half of if/else/endif, generate a jump past the else: */ - instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP); + instr = instr_create(ctx, 0, OPC_JUMP); pop_branch(ctx); push_branch(ctx, instr); @@ -1060,7 +1076,7 @@ instr_cat0(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - ir3_instr_create(ctx->ir, 0, t->opc); + instr_create(ctx, 0, t->opc); } static void @@ -1083,7 +1099,7 @@ instr_cat1(const struct instr_translater *t, * in the future if we start supporting widening/narrowing or * conversion to/from integer.. */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + instr = instr_create(ctx, 2, OPC_ADD_F); get_immediate(ctx, &constval, fui(0.0)); vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); } else { @@ -1129,14 +1145,14 @@ instr_cat2(const struct instr_translater *t, case OPC_SETRM: case OPC_CBITS_B: /* these only have one src reg */ - instr = ir3_instr_create(ctx->ir, 2, t->opc); + instr = instr_create(ctx, 2, t->opc); vectorize(ctx, instr, dst, 1, src0, src0_flags); break; default: if (is_const(src0) && is_const(src1)) src0 = get_unconst(ctx, src0); - instr = ir3_instr_create(ctx->ir, 2, t->opc); + instr = instr_create(ctx, 2, t->opc); vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, src1_flags); break; @@ -1186,7 +1202,7 @@ instr_cat3(const struct instr_translater *t, } } - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? t->hopc : t->opc); vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, &inst->Src[2].Register, 0); @@ -1214,8 +1230,8 @@ instr_cat4(const struct instr_translater *t, for (i = 0, n = 0; i < 4; i++) { if (dst->WriteMask & (1 << i)) { if (n++) - ir3_instr_create(ctx->ir, 0, OPC_NOP); - instr = ir3_instr_create(ctx->ir, 4, t->opc); + add_nop(ctx, 1); + instr = instr_create(ctx, 4, t->opc); add_dst_reg(ctx, instr, dst, i); add_src_reg(ctx, instr, src, src->SwizzleX); } @@ -1315,7 +1331,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) struct ir3_instruction *instr; struct ir3_register *dst; - instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F); + instr = instr_create(ctx, 2, OPC_BARY_F); /* dst register: */ dst = ir3_reg_create(instr, r + j, flags); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h index da25cdce88a..5cdb245640b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h @@ -33,61 +33,6 @@ #include "fd3_util.h" -/* ************************************************************************* */ -/* split this out or find some helper to use.. like main/bitset.h.. */ - -#define MAX_REG 256 - -typedef uint8_t regmask_t[2 * MAX_REG / 8]; - -static inline unsigned regmask_idx(struct ir3_register *reg) -{ - unsigned num = reg->num; - assert(num < MAX_REG); - if (reg->flags & IR3_REG_HALF) - num += MAX_REG; - return num; -} - -static inline void regmask_init(regmask_t *regmask) -{ - memset(regmask, 0, sizeof(*regmask)); -} - -static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - (*regmask)[idx / 8] |= 1 << (idx % 8); -} - -static inline unsigned regmask_get(regmask_t *regmask, - struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - if ((*regmask)[idx / 8] & (1 << (idx % 8))) - return true; - return false; -} - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - -/* ************************************************************************* */ - int fd3_compile_shader(struct fd3_shader_stateobj *so, const struct tgsi_token *tokens); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 3df29ecc911..ddb33ca5844 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -79,9 +79,10 @@ static void fixup_vp_regfootprint(struct fd3_shader_stateobj *so) { unsigned i; - for (i = 0; i < so->inputs_count; i++) { + for (i = 0; i < so->inputs_count; i++) so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2); - } + for (i = 0; i < so->outputs_count; i++) + so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2); } static struct fd3_shader_stateobj * @@ -230,7 +231,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic) } static uint32_t -find_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) +find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -257,13 +258,13 @@ fd3_program_emit(struct fd_ringbuffer *ring, fsi = &fp->info; } - pos_regid = find_regid(vp, + pos_regid = find_output_regid(vp, fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - posz_regid = find_regid(fp, + posz_regid = find_output_regid(fp, fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - psize_regid = find_regid(vp, + psize_regid = find_output_regid(vp, fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); - color_regid = find_regid(fp, + color_regid = find_output_regid(fp, fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); /* we could probably divide this up into things that need to be @@ -501,10 +502,11 @@ create_blit_fp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */ - instr = ir3_instr_create(ir, 2, OPC_BARY_F); + instr = ir3_instr_create(block, 2, OPC_BARY_F); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; instr->repeat = 1; @@ -514,11 +516,11 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,0), 0); /* r0.x */ /* (rpt5)nop */ - instr = ir3_instr_create(ir, 0, OPC_NOP); + instr = ir3_instr_create(block, 0, OPC_NOP); instr->repeat = 5; /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */ - instr = ir3_instr_create(ir, 5, OPC_SAM); + instr = ir3_instr_create(block, 5, OPC_SAM); instr->cat5.samp = 0; instr->cat5.tex = 0; instr->cat5.type = TYPE_F32; @@ -528,7 +530,7 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,2), 0); /* r0.z */ /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */ - instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */ instr->flags = IR3_INSTR_SY; instr->repeat = 3; instr->cat1.src_type = TYPE_F32; @@ -538,7 +540,7 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,0), IR3_REG_R); /* (r)r0.x */ /* end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); if (!so) @@ -573,10 +575,11 @@ create_blit_vp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; so = create_internal_shader(pctx, SHADER_VERTEX, ir); @@ -611,10 +614,11 @@ create_solid_fp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */ - instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; instr->repeat = 3; instr->cat1.src_type = TYPE_F16; @@ -625,7 +629,7 @@ create_solid_fp(struct pipe_context *pctx) IR3_REG_CONST | IR3_REG_R); /* end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); if (!so) @@ -650,10 +654,11 @@ create_solid_vp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 4aeeb2e3006..c781dfe4be9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -33,7 +33,7 @@ #include "freedreno_context.h" -#include "ir-a3xx.h" +#include "ir3.h" #include "disasm.h" typedef uint16_t fd3_semantic; /* semantic name + index */ @@ -43,6 +43,16 @@ fd3_semantic_name(uint8_t name, uint16_t index) return (name << 8) | (index & 0xff); } +static inline uint8_t sem2name(fd3_semantic sem) +{ + return sem >> 8; +} + +static inline uint16_t sem2idx(fd3_semantic sem) +{ + return sem & 0xff; +} + struct fd3_shader_stateobj { enum shader_t type; diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h index 1085ddf8c12..b0f78341131 100644 --- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h @@ -438,6 +438,23 @@ typedef struct PACKED { uint32_t opc_cat : 3; } instr_cat3_t; +static inline bool instr_cat3_full(instr_cat3_t *cat3) +{ + switch (cat3->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + return false; + default: + return true; + } +} + typedef struct PACKED { /* dword0: */ union PACKED { @@ -612,4 +629,18 @@ typedef union PACKED { }; } instr_t; +static inline uint32_t instr_opc(instr_t *instr) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.opc; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: return instr->cat6.opc; + default: return 0; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c deleted file mode 100644 index a39214ee663..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir-a3xx.h" - -#include -#include -#include -#include -#include -#include - -#include "freedreno_util.h" -#include "instr-a3xx.h" - -/* simple allocator to carve allocations out of an up-front allocated heap, - * so that we can free everything easily in one shot. - */ -static void * ir3_alloc(struct ir3_shader *shader, int sz) -{ - void *ptr = &shader->heap[shader->heap_idx]; - shader->heap_idx += align(sz, 4); - return ptr; -} - -struct ir3_shader * ir3_shader_create(void) -{ - return calloc(1, sizeof(struct ir3_shader)); -} - -void ir3_shader_destroy(struct ir3_shader *shader) -{ - free(shader); -} - -#define iassert(cond) do { \ - if (!(cond)) { \ - assert(cond); \ - return -1; \ - } } while (0) - -static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info, - uint32_t repeat, uint32_t valid_flags) -{ - reg_t val = { .dummy32 = 0 }; - - assert(!(reg->flags & ~valid_flags)); - - if (!(reg->flags & IR3_REG_R)) - repeat = 0; - - if (reg->flags & IR3_REG_IMMED) { - val.iim_val = reg->iim_val; - } else { - int8_t max = (reg->num + repeat) >> 2; - - val.comp = reg->num & 0x3; - val.num = reg->num >> 2; - - if (reg->flags & IR3_REG_CONST) { - info->max_const = MAX2(info->max_const, max); - } else if ((max != REG_A0) && (max != REG_P0)) { - if (reg->flags & IR3_REG_HALF) { - info->max_half_reg = MAX2(info->max_half_reg, max); - } else { - info->max_reg = MAX2(info->max_reg, max); - } - } - } - - return val.dummy32; -} - -static int emit_cat0(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - instr_cat0_t *cat0 = ptr; - - cat0->immed = instr->cat0.immed; - cat0->repeat = instr->repeat; - cat0->ss = !!(instr->flags & IR3_INSTR_SS); - cat0->inv = instr->cat0.inv; - cat0->comp = instr->cat0.comp; - cat0->opc = instr->opc; - cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat0->sync = !!(instr->flags & IR3_INSTR_SY); - cat0->opc_cat = 0; - - return 0; -} - -static uint32_t type_flags(type_t type) -{ - return (type_size(type) == 32) ? 0 : IR3_REG_HALF; -} - -static int emit_cat1(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat1_t *cat1 = ptr; - - iassert(instr->regs_count == 2); - iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); - iassert((src->flags & IR3_REG_IMMED) || - !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); - - if (src->flags & IR3_REG_IMMED) { - cat1->iim_val = src->iim_val; - cat1->src_im = 1; - } else if (src->flags & IR3_REG_RELATIV) { - cat1->off = src->offset; - cat1->src_rel = 1; - cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); - } else { - cat1->src = reg(src, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_R | - IR3_REG_CONST | IR3_REG_HALF); - cat1->src_c = !!(src->flags & IR3_REG_CONST); - } - - cat1->dst = reg(dst, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_EVEN | - IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); - cat1->repeat = instr->repeat; - cat1->src_r = !!(src->flags & IR3_REG_R); - cat1->ss = !!(instr->flags & IR3_INSTR_SS); - cat1->ul = !!(instr->flags & IR3_INSTR_UL); - cat1->dst_type = instr->cat1.dst_type; - cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); - cat1->src_type = instr->cat1.src_type; - cat1->even = !!(dst->flags & IR3_REG_EVEN); - cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); - cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat1->sync = !!(instr->flags & IR3_INSTR_SY); - cat1->opc_cat = 1; - - return 0; -} - -static int emit_cat2(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - instr_cat2_t *cat2 = ptr; - - iassert((instr->regs_count == 2) || (instr->regs_count == 3)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->num < (1 << 10)); - cat2->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat2->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat2->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat2->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat2->src1 = reg(src1, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); - cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE); - cat2->src1_abs = !!(src1->flags & IR3_REG_ABS); - cat2->src1_r = !!(src1->flags & IR3_REG_R); - - if (src2) { - iassert((src2->flags & IR3_REG_IMMED) || - !((src1->flags ^ src2->flags) & IR3_REG_HALF)); - - if (src2->flags & IR3_REG_RELATIV) { - iassert(src2->num < (1 << 10)); - cat2->rel2.src2 = reg(src2, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); - cat2->rel2.src2_rel = 1; - } else if (src2->flags & IR3_REG_CONST) { - iassert(src2->num < (1 << 12)); - cat2->c2.src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat2->c2.src2_c = 1; - } else { - iassert(src2->num < (1 << 11)); - cat2->src2 = reg(src2, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - - cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); - cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE); - cat2->src2_abs = !!(src2->flags & IR3_REG_ABS); - cat2->src2_r = !!(src2->flags & IR3_REG_R); - } - - cat2->dst = reg(dst, info, instr->repeat, - IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); - cat2->repeat = instr->repeat; - cat2->ss = !!(instr->flags & IR3_INSTR_SS); - cat2->ul = !!(instr->flags & IR3_INSTR_UL); - cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); - cat2->ei = !!(dst->flags & IR3_REG_EI); - cat2->cond = instr->cat2.condition; - cat2->full = ! (src1->flags & IR3_REG_HALF); - cat2->opc = instr->opc; - cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat2->sync = !!(instr->flags & IR3_INSTR_SY); - cat2->opc_cat = 2; - - return 0; -} - -static int emit_cat3(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - instr_cat3_t *cat3 = ptr; - uint32_t src_flags = 0; - - switch (instr->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - src_flags |= IR3_REG_HALF; - break; - default: - break; - } - - iassert(instr->regs_count == 4); - iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->num < (1 << 10)); - cat3->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat3->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat3->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | - IR3_REG_HALF); - cat3->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat3->src1 = reg(src1, info, instr->repeat, - IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); - } - - cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE); - cat3->src1_r = !!(src1->flags & IR3_REG_R); - - cat3->src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->src2_c = !!(src2->flags & IR3_REG_CONST); - cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE); - cat3->src2_r = !!(src2->flags & IR3_REG_R); - - - if (src3->flags & IR3_REG_RELATIV) { - iassert(src3->num < (1 << 10)); - cat3->rel2.src3 = reg(src3, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); - cat3->rel2.src3_rel = 1; - } else if (src3->flags & IR3_REG_CONST) { - iassert(src3->num < (1 << 12)); - cat3->c2.src3 = reg(src3, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | - IR3_REG_HALF); - cat3->c2.src3_c = 1; - } else { - iassert(src3->num < (1 << 11)); - cat3->src3 = reg(src3, info, instr->repeat, - IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); - } - - cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE); - cat3->src3_r = !!(src3->flags & IR3_REG_R); - - cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat3->repeat = instr->repeat; - cat3->ss = !!(instr->flags & IR3_INSTR_SS); - cat3->ul = !!(instr->flags & IR3_INSTR_UL); - cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); - cat3->opc = instr->opc; - cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat3->sync = !!(instr->flags & IR3_INSTR_SY); - cat3->opc_cat = 3; - - return 0; -} - -static int emit_cat4(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat4_t *cat4 = ptr; - - iassert(instr->regs_count == 2); - - if (src->flags & IR3_REG_RELATIV) { - iassert(src->num < (1 << 10)); - cat4->rel.src = reg(src, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); - cat4->rel.src_rel = 1; - } else if (src->flags & IR3_REG_CONST) { - iassert(src->num < (1 << 12)); - cat4->c.src = reg(src, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat4->c.src_c = 1; - } else { - iassert(src->num < (1 << 11)); - cat4->src = reg(src, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - - cat4->src_im = !!(src->flags & IR3_REG_IMMED); - cat4->src_neg = !!(src->flags & IR3_REG_NEGATE); - cat4->src_abs = !!(src->flags & IR3_REG_ABS); - cat4->src_r = !!(src->flags & IR3_REG_R); - - cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat4->repeat = instr->repeat; - cat4->ss = !!(instr->flags & IR3_INSTR_SS); - cat4->ul = !!(instr->flags & IR3_INSTR_UL); - cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); - cat4->full = ! (src->flags & IR3_REG_HALF); - cat4->opc = instr->opc; - cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat4->sync = !!(instr->flags & IR3_INSTR_SY); - cat4->opc_cat = 4; - - return 0; -} - -static int emit_cat5(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - instr_cat5_t *cat5 = ptr; - - iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); - - if (src1) { - cat5->full = ! (src1->flags & IR3_REG_HALF); - cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); - } - - - if (instr->flags & IR3_INSTR_S2EN) { - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - if (src3) { - iassert(src3->flags & IR3_REG_HALF); - cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); - } - iassert(!(instr->cat5.samp | instr->cat5.tex)); - } else { - iassert(!src3); - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - cat5->norm.samp = instr->cat5.samp; - cat5->norm.tex = instr->cat5.tex; - } - - cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat5->wrmask = dst->wrmask; - cat5->type = instr->cat5.type; - cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); - cat5->is_a = !!(instr->flags & IR3_INSTR_A); - cat5->is_s = !!(instr->flags & IR3_INSTR_S); - cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); - cat5->is_o = !!(instr->flags & IR3_INSTR_O); - cat5->is_p = !!(instr->flags & IR3_INSTR_P); - cat5->opc = instr->opc; - cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat5->sync = !!(instr->flags & IR3_INSTR_SY); - cat5->opc_cat = 5; - - return 0; -} - -static int emit_cat6(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat6_t *cat6 = ptr; - - iassert(instr->regs_count == 2); - - switch (instr->opc) { - /* load instructions: */ - case OPC_LDG: - case OPC_LDP: - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - case OPC_PREFETCH: { - instr_cat6a_t *cat6a = ptr; - - iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF)); - - cat6a->must_be_one1 = 1; - cat6a->must_be_one2 = 1; - cat6a->off = instr->cat6.offset; - cat6a->src = reg(src, info, instr->repeat, 0); - cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - break; - } - /* store instructions: */ - case OPC_STG: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_STI: { - instr_cat6b_t *cat6b = ptr; - uint32_t src_flags = type_flags(instr->cat6.type); - uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0; - - iassert(!((src->flags ^ src_flags) & IR3_REG_HALF)); - - cat6b->must_be_one1 = 1; - cat6b->must_be_one2 = 1; - cat6b->src = reg(src, info, instr->repeat, src_flags); - cat6b->off_hi = instr->cat6.offset >> 8; - cat6b->off = instr->cat6.offset; - cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags); - - break; - } - default: - // TODO - break; - } - - cat6->iim_val = instr->cat6.iim_val; - cat6->type = instr->cat6.type; - cat6->opc = instr->opc; - cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat6->sync = !!(instr->flags & IR3_INSTR_SY); - cat6->opc_cat = 6; - - return 0; -} - -static int (*emit[])(struct ir3_instruction *instr, void *ptr, - struct ir3_shader_info *info) = { - emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, -}; - -void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info) -{ - uint32_t *ptr, *dwords; - uint32_t i; - - info->max_reg = -1; - info->max_half_reg = -1; - info->max_const = -1; - - /* need a integer number of instruction "groups" (sets of four - * instructions), so pad out w/ NOPs if needed: - * (each instruction is 64bits) - */ - info->sizedwords = 2 * align(shader->instrs_count, 4); - - ptr = dwords = calloc(1, 4 * info->sizedwords); - - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; - int ret = emit[instr->category](instr, dwords, info); - if (ret) - goto fail; - dwords += 2; - } - - return ptr; - -fail: - free(ptr); - return NULL; -} - -static struct ir3_register * reg_create(struct ir3_shader *shader, - int num, int flags) -{ - struct ir3_register *reg = - ir3_alloc(shader, sizeof(struct ir3_register)); - reg->wrmask = 1; - reg->flags = flags; - reg->num = num; - return reg; -} - -static void insert_instr(struct ir3_shader *shader, - struct ir3_instruction *instr) -{ - assert(shader->instrs_count < ARRAY_SIZE(shader->instrs)); - shader->instrs[shader->instrs_count++] = instr; -} - -struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, - int category, opc_t opc) -{ - struct ir3_instruction *instr = - ir3_alloc(shader, sizeof(struct ir3_instruction)); - instr->shader = shader; - instr->category = category; - instr->opc = opc; - insert_instr(shader, instr); - return instr; -} - -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) -{ - struct ir3_instruction *new_instr = - ir3_alloc(instr->shader, sizeof(struct ir3_instruction)); - unsigned i; - - *new_instr = *instr; - insert_instr(instr->shader, new_instr); - - /* clone registers: */ - new_instr->regs_count = 0; - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - struct ir3_register *new_reg = - ir3_reg_create(new_instr, reg->num, reg->flags); - *new_reg = *reg; - } - - return new_instr; -} - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags) -{ - struct ir3_register *reg = reg_create(instr->shader, num, flags); - assert(instr->regs_count < ARRAY_SIZE(instr->regs)); - instr->regs[instr->regs_count++] = reg; - return reg; -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h deleted file mode 100644 index b0afe1868eb..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR3_H_ -#define IR3_H_ - -#include -#include - -#include "instr-a3xx.h" - -/* low level intermediate representation of an adreno shader program */ - -struct ir3_shader; - -struct ir3_shader * fd_asm_parse(const char *src); - -struct ir3_shader_info { - uint16_t sizedwords; - /* NOTE: max_reg, etc, does not include registers not touched - * by the shader (ie. vertex fetched via VFD_DECODE but not - * touched by shader) - */ - int8_t max_reg; /* highest GPR # used by shader */ - int8_t max_half_reg; - int8_t max_const; -}; - -struct ir3_register { - enum { - IR3_REG_CONST = 0x001, - IR3_REG_IMMED = 0x002, - IR3_REG_HALF = 0x004, - IR3_REG_RELATIV= 0x008, - IR3_REG_R = 0x010, - IR3_REG_NEGATE = 0x020, - IR3_REG_ABS = 0x040, - IR3_REG_EVEN = 0x080, - IR3_REG_POS_INF= 0x100, - /* (ei) flag, end-input? Set on last bary, presumably to signal - * that the shader needs no more input: - */ - IR3_REG_EI = 0x200, - } flags; - union { - /* normal registers: - * the component is in the low two bits of the reg #, so - * rN.x becomes: (N << 2) | x - */ - int num; - /* immediate: */ - int iim_val; - float fim_val; - /* relative: */ - int offset; - }; - - /* used for cat5 instructions, but also for internal/IR level - * tracking of what registers are read/written by an instruction. - * wrmask may be a bad name since it is used to represent both - * src and dst that touch multiple adjacent registers. - */ - int wrmask; -}; - -struct ir3_instruction { - struct ir3_shader *shader; - int category; - opc_t opc; - enum { - /* (sy) flag is set on first instruction, and after sample - * instructions (probably just on RAW hazard). - */ - IR3_INSTR_SY = 0x001, - /* (ss) flag is set on first instruction, and first instruction - * to depend on the result of "long" instructions (RAW hazard): - * - * rcp, rsq, log2, exp2, sin, cos, sqrt - * - * It seems to synchronize until all in-flight instructions are - * completed, for example: - * - * rsq hr1.w, hr1.w - * add.f hr2.z, (neg)hr2.z, hc0.y - * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y - * rsq hr2.x, hr2.x - * (rpt1)nop - * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w - * nop - * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w - * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w - * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x - * - * The last mul.f does not have (ss) set, presumably because the - * (ss) on the previous instruction does the job. - * - * The blob driver also seems to set it on WAR hazards, although - * not really clear if this is needed or just blob compiler being - * sloppy. So far I haven't found a case where removing the (ss) - * causes problems for WAR hazard, but I could just be getting - * lucky: - * - * rcp r1.y, r3.y - * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z - * - */ - IR3_INSTR_SS = 0x002, - /* (jp) flag is set on jump targets: - */ - IR3_INSTR_JP = 0x004, - IR3_INSTR_UL = 0x008, - IR3_INSTR_3D = 0x010, - IR3_INSTR_A = 0x020, - IR3_INSTR_O = 0x040, - IR3_INSTR_P = 0x080, - IR3_INSTR_S = 0x100, - IR3_INSTR_S2EN = 0x200, - } flags; - int repeat; - unsigned regs_count; - struct ir3_register *regs[4]; - union { - struct { - char inv; - char comp; - int immed; - } cat0; - struct { - type_t src_type, dst_type; - } cat1; - struct { - enum { - IR3_COND_LT = 0, - IR3_COND_LE = 1, - IR3_COND_GT = 2, - IR3_COND_GE = 3, - IR3_COND_EQ = 4, - IR3_COND_NE = 5, - } condition; - } cat2; - struct { - unsigned samp, tex; - type_t type; - } cat5; - struct { - type_t type; - int offset; - int iim_val; - } cat6; - }; -}; - -#define MAX_INSTRS 1024 - -struct ir3_shader { - unsigned instrs_count; - struct ir3_instruction *instrs[MAX_INSTRS]; - uint32_t heap[128 * MAX_INSTRS]; - unsigned heap_idx; -}; - -struct ir3_shader * ir3_shader_create(void); -void ir3_shader_destroy(struct ir3_shader *shader); -void * ir3_shader_assemble(struct ir3_shader *shader, - struct ir3_shader_info *info); - -struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, - int category, opc_t opc); -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags); - -#endif /* IR3_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.c b/src/gallium/drivers/freedreno/a3xx/ir3.c new file mode 100644 index 00000000000..2a06d42c7d6 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/ir3.c @@ -0,0 +1,640 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir3.h" + +#include +#include +#include +#include +#include +#include + +#include "freedreno_util.h" +#include "instr-a3xx.h" + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +static void * ir3_alloc(struct ir3_shader *shader, int sz) +{ + void *ptr = &shader->heap[shader->heap_idx]; + shader->heap_idx += align(sz, 4); + return ptr; +} + +struct ir3_shader * ir3_shader_create(void) +{ + return calloc(1, sizeof(struct ir3_shader)); +} + +void ir3_shader_destroy(struct ir3_shader *shader) +{ + free(shader); +} + +#define iassert(cond) do { \ + if (!(cond)) { \ + assert(cond); \ + return -1; \ + } } while (0) + +static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info, + uint32_t repeat, uint32_t valid_flags) +{ + reg_t val = { .dummy32 = 0 }; + + assert(!(reg->flags & ~valid_flags)); + + if (!(reg->flags & IR3_REG_R)) + repeat = 0; + + if (reg->flags & IR3_REG_IMMED) { + val.iim_val = reg->iim_val; + } else { + int8_t components = util_last_bit(reg->wrmask); + int8_t max = (reg->num + repeat + components - 1) >> 2; + + val.comp = reg->num & 0x3; + val.num = reg->num >> 2; + + if (reg->flags & IR3_REG_CONST) { + info->max_const = MAX2(info->max_const, max); + } else if ((max != REG_A0) && (max != REG_P0)) { + if (reg->flags & IR3_REG_HALF) { + info->max_half_reg = MAX2(info->max_half_reg, max); + } else { + info->max_reg = MAX2(info->max_reg, max); + } + } + } + + return val.dummy32; +} + +static int emit_cat0(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + instr_cat0_t *cat0 = ptr; + + cat0->immed = instr->cat0.immed; + cat0->repeat = instr->repeat; + cat0->ss = !!(instr->flags & IR3_INSTR_SS); + cat0->inv = instr->cat0.inv; + cat0->comp = instr->cat0.comp; + cat0->opc = instr->opc; + cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat0->sync = !!(instr->flags & IR3_INSTR_SY); + cat0->opc_cat = 0; + + return 0; +} + +static uint32_t type_flags(type_t type) +{ + return (type_size(type) == 32) ? 0 : IR3_REG_HALF; +} + +static int emit_cat1(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat1_t *cat1 = ptr; + + iassert(instr->regs_count == 2); + iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); + iassert((src->flags & IR3_REG_IMMED) || + !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); + + if (src->flags & IR3_REG_IMMED) { + cat1->iim_val = src->iim_val; + cat1->src_im = 1; + } else if (src->flags & IR3_REG_RELATIV) { + cat1->off = src->offset; + cat1->src_rel = 1; + cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); + } else { + cat1->src = reg(src, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_R | + IR3_REG_CONST | IR3_REG_HALF); + cat1->src_c = !!(src->flags & IR3_REG_CONST); + } + + cat1->dst = reg(dst, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_EVEN | + IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); + cat1->repeat = instr->repeat; + cat1->src_r = !!(src->flags & IR3_REG_R); + cat1->ss = !!(instr->flags & IR3_INSTR_SS); + cat1->ul = !!(instr->flags & IR3_INSTR_UL); + cat1->dst_type = instr->cat1.dst_type; + cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); + cat1->src_type = instr->cat1.src_type; + cat1->even = !!(dst->flags & IR3_REG_EVEN); + cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); + cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat1->sync = !!(instr->flags & IR3_INSTR_SY); + cat1->opc_cat = 1; + + return 0; +} + +static int emit_cat2(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + instr_cat2_t *cat2 = ptr; + + iassert((instr->regs_count == 2) || (instr->regs_count == 3)); + + if (src1->flags & IR3_REG_RELATIV) { + iassert(src1->num < (1 << 10)); + cat2->rel1.src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); + cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); + cat2->rel1.src1_rel = 1; + } else if (src1->flags & IR3_REG_CONST) { + iassert(src1->num < (1 << 12)); + cat2->c1.src1 = reg(src1, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + cat2->c1.src1_c = 1; + } else { + iassert(src1->num < (1 << 11)); + cat2->src1 = reg(src1, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + } + cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); + cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE); + cat2->src1_abs = !!(src1->flags & IR3_REG_ABS); + cat2->src1_r = !!(src1->flags & IR3_REG_R); + + if (src2) { + iassert((src2->flags & IR3_REG_IMMED) || + !((src1->flags ^ src2->flags) & IR3_REG_HALF)); + + if (src2->flags & IR3_REG_RELATIV) { + iassert(src2->num < (1 << 10)); + cat2->rel2.src2 = reg(src2, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); + cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); + cat2->rel2.src2_rel = 1; + } else if (src2->flags & IR3_REG_CONST) { + iassert(src2->num < (1 << 12)); + cat2->c2.src2 = reg(src2, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + cat2->c2.src2_c = 1; + } else { + iassert(src2->num < (1 << 11)); + cat2->src2 = reg(src2, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + } + + cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); + cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE); + cat2->src2_abs = !!(src2->flags & IR3_REG_ABS); + cat2->src2_r = !!(src2->flags & IR3_REG_R); + } + + cat2->dst = reg(dst, info, instr->repeat, + IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); + cat2->repeat = instr->repeat; + cat2->ss = !!(instr->flags & IR3_INSTR_SS); + cat2->ul = !!(instr->flags & IR3_INSTR_UL); + cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); + cat2->ei = !!(dst->flags & IR3_REG_EI); + cat2->cond = instr->cat2.condition; + cat2->full = ! (src1->flags & IR3_REG_HALF); + cat2->opc = instr->opc; + cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat2->sync = !!(instr->flags & IR3_INSTR_SY); + cat2->opc_cat = 2; + + return 0; +} + +static int emit_cat3(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + instr_cat3_t *cat3 = ptr; + uint32_t src_flags = 0; + + switch (instr->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + src_flags |= IR3_REG_HALF; + break; + default: + break; + } + + iassert(instr->regs_count == 4); + iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); + + if (src1->flags & IR3_REG_RELATIV) { + iassert(src1->num < (1 << 10)); + cat3->rel1.src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_R | IR3_REG_HALF); + cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); + cat3->rel1.src1_rel = 1; + } else if (src1->flags & IR3_REG_CONST) { + iassert(src1->num < (1 << 12)); + cat3->c1.src1 = reg(src1, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | + IR3_REG_HALF); + cat3->c1.src1_c = 1; + } else { + iassert(src1->num < (1 << 11)); + cat3->src1 = reg(src1, info, instr->repeat, + IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); + } + + cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE); + cat3->src1_r = !!(src1->flags & IR3_REG_R); + + cat3->src2 = reg(src2, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_R | IR3_REG_HALF); + cat3->src2_c = !!(src2->flags & IR3_REG_CONST); + cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE); + cat3->src2_r = !!(src2->flags & IR3_REG_R); + + + if (src3->flags & IR3_REG_RELATIV) { + iassert(src3->num < (1 << 10)); + cat3->rel2.src3 = reg(src3, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_R | IR3_REG_HALF); + cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); + cat3->rel2.src3_rel = 1; + } else if (src3->flags & IR3_REG_CONST) { + iassert(src3->num < (1 << 12)); + cat3->c2.src3 = reg(src3, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | + IR3_REG_HALF); + cat3->c2.src3_c = 1; + } else { + iassert(src3->num < (1 << 11)); + cat3->src3 = reg(src3, info, instr->repeat, + IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); + } + + cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE); + cat3->src3_r = !!(src3->flags & IR3_REG_R); + + cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat3->repeat = instr->repeat; + cat3->ss = !!(instr->flags & IR3_INSTR_SS); + cat3->ul = !!(instr->flags & IR3_INSTR_UL); + cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); + cat3->opc = instr->opc; + cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat3->sync = !!(instr->flags & IR3_INSTR_SY); + cat3->opc_cat = 3; + + return 0; +} + +static int emit_cat4(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat4_t *cat4 = ptr; + + iassert(instr->regs_count == 2); + + if (src->flags & IR3_REG_RELATIV) { + iassert(src->num < (1 << 10)); + cat4->rel.src = reg(src, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); + cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); + cat4->rel.src_rel = 1; + } else if (src->flags & IR3_REG_CONST) { + iassert(src->num < (1 << 12)); + cat4->c.src = reg(src, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + cat4->c.src_c = 1; + } else { + iassert(src->num < (1 << 11)); + cat4->src = reg(src, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | + IR3_REG_R | IR3_REG_HALF); + } + + cat4->src_im = !!(src->flags & IR3_REG_IMMED); + cat4->src_neg = !!(src->flags & IR3_REG_NEGATE); + cat4->src_abs = !!(src->flags & IR3_REG_ABS); + cat4->src_r = !!(src->flags & IR3_REG_R); + + cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat4->repeat = instr->repeat; + cat4->ss = !!(instr->flags & IR3_INSTR_SS); + cat4->ul = !!(instr->flags & IR3_INSTR_UL); + cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); + cat4->full = ! (src->flags & IR3_REG_HALF); + cat4->opc = instr->opc; + cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat4->sync = !!(instr->flags & IR3_INSTR_SY); + cat4->opc_cat = 4; + + return 0; +} + +static int emit_cat5(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + instr_cat5_t *cat5 = ptr; + + iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); + + if (src1) { + cat5->full = ! (src1->flags & IR3_REG_HALF); + cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); + } + + + if (instr->flags & IR3_INSTR_S2EN) { + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + if (src3) { + iassert(src3->flags & IR3_REG_HALF); + cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); + } + iassert(!(instr->cat5.samp | instr->cat5.tex)); + } else { + iassert(!src3); + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + cat5->norm.samp = instr->cat5.samp; + cat5->norm.tex = instr->cat5.tex; + } + + cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat5->wrmask = dst->wrmask; + cat5->type = instr->cat5.type; + cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); + cat5->is_a = !!(instr->flags & IR3_INSTR_A); + cat5->is_s = !!(instr->flags & IR3_INSTR_S); + cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); + cat5->is_o = !!(instr->flags & IR3_INSTR_O); + cat5->is_p = !!(instr->flags & IR3_INSTR_P); + cat5->opc = instr->opc; + cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat5->sync = !!(instr->flags & IR3_INSTR_SY); + cat5->opc_cat = 5; + + return 0; +} + +static int emit_cat6(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat6_t *cat6 = ptr; + + iassert(instr->regs_count == 2); + + switch (instr->opc) { + /* load instructions: */ + case OPC_LDG: + case OPC_LDP: + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + case OPC_PREFETCH: { + instr_cat6a_t *cat6a = ptr; + + iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF)); + + cat6a->must_be_one1 = 1; + cat6a->must_be_one2 = 1; + cat6a->off = instr->cat6.offset; + cat6a->src = reg(src, info, instr->repeat, 0); + cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + break; + } + /* store instructions: */ + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_STI: { + instr_cat6b_t *cat6b = ptr; + uint32_t src_flags = type_flags(instr->cat6.type); + uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0; + + iassert(!((src->flags ^ src_flags) & IR3_REG_HALF)); + + cat6b->must_be_one1 = 1; + cat6b->must_be_one2 = 1; + cat6b->src = reg(src, info, instr->repeat, src_flags); + cat6b->off_hi = instr->cat6.offset >> 8; + cat6b->off = instr->cat6.offset; + cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags); + + break; + } + default: + // TODO + break; + } + + cat6->iim_val = instr->cat6.iim_val; + cat6->type = instr->cat6.type; + cat6->opc = instr->opc; + cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat6->sync = !!(instr->flags & IR3_INSTR_SY); + cat6->opc_cat = 6; + + return 0; +} + +static int (*emit[])(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) = { + emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, +}; + +void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info) +{ + uint32_t *ptr, *dwords; + uint32_t i; + + info->max_reg = -1; + info->max_half_reg = -1; + info->max_const = -1; + info->instrs_count = 0; + + /* need a integer number of instruction "groups" (sets of four + * instructions), so pad out w/ NOPs if needed: + * (each instruction is 64bits) + */ + info->sizedwords = 2 * align(shader->instrs_count, 4); + + ptr = dwords = calloc(1, 4 * info->sizedwords); + + for (i = 0; i < shader->instrs_count; i++) { + struct ir3_instruction *instr = shader->instrs[i]; + int ret = emit[instr->category](instr, dwords, info); + if (ret) + goto fail; + info->instrs_count += 1 + instr->repeat; + dwords += 2; + } + + return ptr; + +fail: + free(ptr); + return NULL; +} + +static struct ir3_register * reg_create(struct ir3_shader *shader, + int num, int flags) +{ + struct ir3_register *reg = + ir3_alloc(shader, sizeof(struct ir3_register)); + reg->wrmask = 1; + reg->flags = flags; + reg->num = num; + return reg; +} + +static void insert_instr(struct ir3_shader *shader, + struct ir3_instruction *instr) +{ +#ifdef DEBUG + static uint32_t serialno = 0; + instr->serialno = ++serialno; +#endif + assert(shader->instrs_count < ARRAY_SIZE(shader->instrs)); + shader->instrs[shader->instrs_count++] = instr; +} + +struct ir3_block * ir3_block_create(struct ir3_shader *shader, + unsigned ntmp, unsigned nin, unsigned nout) +{ + struct ir3_block *block; + unsigned size; + char *ptr; + + size = sizeof(*block); + size += sizeof(block->temporaries[0]) * ntmp; + size += sizeof(block->inputs[0]) * nin; + size += sizeof(block->outputs[0]) * nout; + + ptr = ir3_alloc(shader, size); + + block = (void *)ptr; + ptr += sizeof(*block); + + block->temporaries = (void *)ptr; + block->ntemporaries = ntmp; + ptr += sizeof(block->temporaries[0]) * ntmp; + + block->inputs = (void *)ptr; + block->ninputs = nin; + ptr += sizeof(block->inputs[0]) * nin; + + block->outputs = (void *)ptr; + block->noutputs = nout; + ptr += sizeof(block->outputs[0]) * nout; + + block->shader = shader; + + return block; +} + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, + int category, opc_t opc) +{ + struct ir3_instruction *instr = + ir3_alloc(block->shader, sizeof(struct ir3_instruction)); + instr->block = block; + instr->category = category; + instr->opc = opc; + insert_instr(block->shader, instr); + return instr; +} + +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) +{ + struct ir3_instruction *new_instr = + ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction)); + unsigned i; + + *new_instr = *instr; + insert_instr(instr->block->shader, new_instr); + + /* clone registers: */ + new_instr->regs_count = 0; + for (i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + struct ir3_register *new_reg = + ir3_reg_create(new_instr, reg->num, reg->flags); + *new_reg = *reg; + } + + return new_instr; +} + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags) +{ + struct ir3_register *reg = reg_create(instr->block->shader, num, flags); + assert(instr->regs_count < ARRAY_SIZE(instr->regs)); + instr->regs[instr->regs_count++] = reg; + return reg; +} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h new file mode 100644 index 00000000000..896bec114fa --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IR3_H_ +#define IR3_H_ + +#include +#include + +#include "instr-a3xx.h" +#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ + +/* low level intermediate representation of an adreno shader program */ + +struct ir3_shader; +struct ir3_instruction; +struct ir3_block; + +struct ir3_shader * fd_asm_parse(const char *src); + +struct ir3_shader_info { + uint16_t sizedwords; + uint16_t instrs_count; /* expanded to account for rpt's */ + /* NOTE: max_reg, etc, does not include registers not touched + * by the shader (ie. vertex fetched via VFD_DECODE but not + * touched by shader) + */ + int8_t max_reg; /* highest GPR # used by shader */ + int8_t max_half_reg; + int8_t max_const; +}; + +struct ir3_register { + enum { + IR3_REG_CONST = 0x001, + IR3_REG_IMMED = 0x002, + IR3_REG_HALF = 0x004, + IR3_REG_RELATIV= 0x008, + IR3_REG_R = 0x010, + IR3_REG_NEGATE = 0x020, + IR3_REG_ABS = 0x040, + IR3_REG_EVEN = 0x080, + IR3_REG_POS_INF= 0x100, + /* (ei) flag, end-input? Set on last bary, presumably to signal + * that the shader needs no more input: + */ + IR3_REG_EI = 0x200, + } flags; + union { + /* normal registers: + * the component is in the low two bits of the reg #, so + * rN.x becomes: (N << 2) | x + */ + int num; + /* immediate: */ + int iim_val; + float fim_val; + /* relative: */ + int offset; + }; + + /* used for cat5 instructions, but also for internal/IR level + * tracking of what registers are read/written by an instruction. + * wrmask may be a bad name since it is used to represent both + * src and dst that touch multiple adjacent registers. + */ + int wrmask; +}; + +struct ir3_instruction { + struct ir3_block *block; + int category; + opc_t opc; + enum { + /* (sy) flag is set on first instruction, and after sample + * instructions (probably just on RAW hazard). + */ + IR3_INSTR_SY = 0x001, + /* (ss) flag is set on first instruction, and first instruction + * to depend on the result of "long" instructions (RAW hazard): + * + * rcp, rsq, log2, exp2, sin, cos, sqrt + * + * It seems to synchronize until all in-flight instructions are + * completed, for example: + * + * rsq hr1.w, hr1.w + * add.f hr2.z, (neg)hr2.z, hc0.y + * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y + * rsq hr2.x, hr2.x + * (rpt1)nop + * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w + * nop + * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w + * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w + * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x + * + * The last mul.f does not have (ss) set, presumably because the + * (ss) on the previous instruction does the job. + * + * The blob driver also seems to set it on WAR hazards, although + * not really clear if this is needed or just blob compiler being + * sloppy. So far I haven't found a case where removing the (ss) + * causes problems for WAR hazard, but I could just be getting + * lucky: + * + * rcp r1.y, r3.y + * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z + * + */ + IR3_INSTR_SS = 0x002, + /* (jp) flag is set on jump targets: + */ + IR3_INSTR_JP = 0x004, + IR3_INSTR_UL = 0x008, + IR3_INSTR_3D = 0x010, + IR3_INSTR_A = 0x020, + IR3_INSTR_O = 0x040, + IR3_INSTR_P = 0x080, + IR3_INSTR_S = 0x100, + IR3_INSTR_S2EN = 0x200, + } flags; + int repeat; + unsigned regs_count; + struct ir3_register *regs[5]; + union { + struct { + char inv; + char comp; + int immed; + } cat0; + struct { + type_t src_type, dst_type; + } cat1; + struct { + enum { + IR3_COND_LT = 0, + IR3_COND_LE = 1, + IR3_COND_GT = 2, + IR3_COND_GE = 3, + IR3_COND_EQ = 4, + IR3_COND_NE = 5, + } condition; + } cat2; + struct { + unsigned samp, tex; + type_t type; + } cat5; + struct { + type_t type; + int offset; + int iim_val; + } cat6; + }; +#ifdef DEBUG + uint32_t serialno; +#endif +}; + +#define MAX_INSTRS 1024 + +struct ir3_shader { + unsigned instrs_count; + struct ir3_instruction *instrs[MAX_INSTRS]; + uint32_t heap[128 * MAX_INSTRS]; + unsigned heap_idx; +}; + +struct ir3_block { + struct ir3_shader *shader; + unsigned ntemporaries, ninputs, noutputs; + /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ + struct ir3_instruction **temporaries; + struct ir3_instruction **inputs; + struct ir3_instruction **outputs; + struct ir3_block *parent; + struct ir3_instruction *head; +}; + +struct ir3_shader * ir3_shader_create(void); +void ir3_shader_destroy(struct ir3_shader *shader); +void * ir3_shader_assemble(struct ir3_shader *shader, + struct ir3_shader_info *info); + +struct ir3_block * ir3_block_create(struct ir3_shader *shader, + unsigned ntmp, unsigned nin, unsigned nout); + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, + int category, opc_t opc); +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags); + + +/* comp: + * 0 - x + * 1 - y + * 2 - z + * 3 - w + */ +static inline uint32_t regid(int num, int comp) +{ + return (num << 2) | (comp & 0x3); +} + +static inline uint32_t reg_num(struct ir3_register *reg) +{ + return reg->num >> 2; +} + +static inline uint32_t reg_comp(struct ir3_register *reg) +{ + return reg->num & 0x3; +} + +static inline bool is_alu(struct ir3_instruction *instr) +{ + return (1 <= instr->category) && (instr->category <= 3); +} + +static inline bool is_sfu(struct ir3_instruction *instr) +{ + return (instr->category == 4); +} + +static inline bool is_tex(struct ir3_instruction *instr) +{ + return (instr->category == 5); +} + +static inline bool is_input(struct ir3_instruction *instr) +{ + return (instr->category == 2) && (instr->opc == OPC_BARY_F); +} + +static inline bool is_gpr(struct ir3_register *reg) +{ + return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)); +} + +/* TODO combine is_gpr()/reg_gpr().. */ +static inline bool reg_gpr(struct ir3_register *r) +{ + if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV)) + return false; + if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) + return false; + return true; +} + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +/* ************************************************************************* */ +/* split this out or find some helper to use.. like main/bitset.h.. */ + +#include + +#define MAX_REG 256 + +typedef uint8_t regmask_t[2 * MAX_REG / 8]; + +static inline unsigned regmask_idx(struct ir3_register *reg) +{ + unsigned num = reg->num; + assert(num < MAX_REG); + if (reg->flags & IR3_REG_HALF) + num += MAX_REG; + return num; +} + +static inline void regmask_init(regmask_t *regmask) +{ + memset(regmask, 0, sizeof(*regmask)); +} + +static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + (*regmask)[idx / 8] |= 1 << (idx % 8); +} + +/* set bits in a if not set in b, conceptually: + * a |= (reg & ~b) + */ +static inline void regmask_set_if_not(regmask_t *a, + struct ir3_register *reg, regmask_t *b) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + if (!((*b)[idx / 8] & (1 << (idx % 8)))) + (*a)[idx / 8] |= 1 << (idx % 8); +} + +static inline unsigned regmask_get(regmask_t *regmask, + struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + if ((*regmask)[idx / 8] & (1 << (idx % 8))) + return true; + return false; +} + +/* ************************************************************************* */ + +#endif /* IR3_H_ */