From: Rob Clark Date: Sat, 20 Apr 2013 21:59:41 +0000 (-0400) Subject: freedreno: move ir -> ir2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=26b39df08f480b3f1d71608ef3c2b56b8be94f3e;p=mesa.git freedreno: move ir -> ir2 There will be a new IR for a3xx, which has a very different shader ISA (more scalar oriented). So rename to avoid conflicts later when I start adding a3xx support to the gallium driver. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 9bb532dc181..dde0cf1647b 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -27,6 +27,6 @@ libfreedreno_la_SOURCES = \ freedreno_screen.c \ freedreno_gmem.c \ freedreno_compiler.c \ - ir.c \ + ir-a2xx.c \ disasm.c diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c index 7eed8fae3b9..3d7f7c970d6 100644 --- a/src/gallium/drivers/freedreno/freedreno_compiler.c +++ b/src/gallium/drivers/freedreno/freedreno_compiler.c @@ -41,7 +41,7 @@ #include "freedreno_util.h" #include "instr-a2xx.h" -#include "ir.h" +#include "ir-a2xx.h" struct fd_compile_context { struct fd_program_stateobj *prog; @@ -52,7 +52,7 @@ struct fd_compile_context { /* predicate stack: */ int pred_depth; - enum ir_pred pred_stack[8]; + enum ir2_pred pred_stack[8]; /* Internal-Temporary and Predicate register assignment: * @@ -93,7 +93,7 @@ struct fd_compile_context { uint64_t need_sync; /* current exec CF instruction */ - struct ir_cf *cf; + struct ir2_cf *cf; }; static int @@ -233,29 +233,29 @@ compile_free(struct fd_compile_context *ctx) tgsi_parse_free(&ctx->parser); } -static struct ir_cf * +static struct ir2_cf * next_exec_cf(struct fd_compile_context *ctx) { - struct ir_cf *cf = ctx->cf; + struct ir2_cf *cf = ctx->cf; if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) - ctx->cf = cf = ir_cf_create(ctx->so->ir, EXEC); + ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC); return cf; } static void compile_vtx_fetch(struct fd_compile_context *ctx) { - struct ir_instruction **vfetch_instrs = ctx->so->vfetch_instrs; + struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; int i; for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { - struct ir_instruction *instr = ir_instr_create( - next_exec_cf(ctx), IR_FETCH); + struct ir2_instruction *instr = ir2_instr_create( + next_exec_cf(ctx), IR2_FETCH); instr->fetch.opc = VTX_FETCH; ctx->need_sync |= 1 << (i+1); - ir_reg_create(instr, i+1, "xyzw", 0); - ir_reg_create(instr, 0, "x", 0); + ir2_reg_create(instr, i+1, "xyzw", 0); + ir2_reg_create(instr, 0, "x", 0); if (i == 0) instr->sync = true; @@ -309,8 +309,8 @@ get_temp_gpr(struct fd_compile_context *ctx, int idx) return num; } -static struct ir_register * -add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, +static struct ir2_register * +add_dst_reg(struct fd_compile_context *ctx, struct ir2_instruction *alu, const struct tgsi_dst_register *dst) { unsigned flags = 0, num = 0; @@ -318,7 +318,7 @@ add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, switch (dst->File) { case TGSI_FILE_OUTPUT: - flags |= IR_REG_EXPORT; + flags |= IR2_REG_EXPORT; if (ctx->type == TGSI_PROCESSOR_VERTEX) { if (dst->Index == ctx->position) { num = 62; @@ -348,11 +348,11 @@ add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; swiz[4] = '\0'; - return ir_reg_create(alu, num, swiz, flags); + return ir2_reg_create(alu, num, swiz, flags); } -static struct ir_register * -add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, +static struct ir2_register * +add_src_reg(struct fd_compile_context *ctx, struct ir2_instruction *alu, const struct tgsi_src_register *src) { static const char swiz_vals[] = { @@ -364,7 +364,7 @@ add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, switch (src->File) { case TGSI_FILE_CONSTANT: num = src->Index; - flags |= IR_REG_CONST; + flags |= IR2_REG_CONST; break; case TGSI_FILE_INPUT: if (ctx->type == TGSI_PROCESSOR_VERTEX) { @@ -379,7 +379,7 @@ add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, break; case TGSI_FILE_IMMEDIATE: num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; - flags |= IR_REG_CONST; + flags |= IR2_REG_CONST; break; default: DBG("unsupported src register file: %s", @@ -389,9 +389,9 @@ add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, } if (src->Absolute) - flags |= IR_REG_ABS; + flags |= IR2_REG_ABS; if (src->Negate) - flags |= IR_REG_NEGATE; + flags |= IR2_REG_NEGATE; swiz[0] = swiz_vals[src->SwizzleX]; swiz[1] = swiz_vals[src->SwizzleY]; @@ -400,16 +400,16 @@ add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, swiz[4] = '\0'; if ((ctx->need_sync & (uint64_t)(1 << num)) && - !(flags & IR_REG_CONST)) { + !(flags & IR2_REG_CONST)) { alu->sync = true; ctx->need_sync &= ~(uint64_t)(1 << num); } - return ir_reg_create(alu, num, swiz, flags); + return ir2_reg_create(alu, num, swiz, flags); } static void -add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) +add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -425,7 +425,7 @@ add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) } static void -add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) +add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -442,7 +442,7 @@ add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) static void add_regs_vector_1(struct fd_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir_instruction *alu) + struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { assert(inst->Instruction.NumSrcRegs == 1); assert(inst->Instruction.NumDstRegs == 1); @@ -455,7 +455,7 @@ add_regs_vector_1(struct fd_compile_context *ctx, static void add_regs_vector_2(struct fd_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir_instruction *alu) + struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { assert(inst->Instruction.NumSrcRegs == 2); assert(inst->Instruction.NumDstRegs == 1); @@ -468,7 +468,7 @@ add_regs_vector_2(struct fd_compile_context *ctx, static void add_regs_vector_3(struct fd_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir_instruction *alu) + struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { assert(inst->Instruction.NumSrcRegs == 3); assert(inst->Instruction.NumDstRegs == 1); @@ -485,19 +485,19 @@ add_regs_vector_3(struct fd_compile_context *ctx, } static void -add_regs_dummy_vector(struct ir_instruction *alu) +add_regs_dummy_vector(struct ir2_instruction *alu) { /* create dummy, non-written vector dst/src regs * for unused vector instr slot: */ - ir_reg_create(alu, 0, "____", 0); /* vector dst */ - ir_reg_create(alu, 0, NULL, 0); /* vector src1 */ - ir_reg_create(alu, 0, NULL, 0); /* vector src2 */ + ir2_reg_create(alu, 0, "____", 0); /* vector dst */ + ir2_reg_create(alu, 0, NULL, 0); /* vector src1 */ + ir2_reg_create(alu, 0, NULL, 0); /* vector src2 */ } static void add_regs_scalar_1(struct fd_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir_instruction *alu) + struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { assert(inst->Instruction.NumSrcRegs == 1); assert(inst->Instruction.NumDstRegs == 1); @@ -590,7 +590,7 @@ get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst, static void push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src) { - struct ir_instruction *alu; + struct ir2_instruction *alu; struct tgsi_dst_register pred_dst; /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by @@ -604,7 +604,7 @@ push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src) get_predicate(ctx, &pred_dst, NULL); - alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); + alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); add_regs_dummy_vector(alu); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, src); @@ -613,15 +613,15 @@ push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src) get_predicate(ctx, &pred_dst, &pred_src); - alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, &pred_src); add_src_reg(ctx, alu, src); - // XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make + // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make // sure src reg is valid if it was calculated with a predicate // condition.. - alu->pred = IR_PRED_NONE; + alu->pred = IR2_PRED_NONE; } /* save previous pred state to restore in pop_predicate(): */ @@ -642,17 +642,17 @@ pop_predicate(struct fd_compile_context *ctx) ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; if (ctx->pred_depth != 0) { - struct ir_instruction *alu; + struct ir2_instruction *alu; struct tgsi_dst_register pred_dst; struct tgsi_src_register pred_src; get_predicate(ctx, &pred_dst, &pred_src); - alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); + alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); add_regs_dummy_vector(alu); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, &pred_src); - alu->pred = IR_PRED_NONE; + alu->pred = IR2_PRED_NONE; } else { /* predicate register no longer needed: */ ctx->pred_reg = -1; @@ -715,16 +715,16 @@ translate_pow(struct fd_compile_context *ctx, { struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; - struct ir_instruction *alu; + struct ir2_instruction *alu; get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); - alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); + alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); add_regs_dummy_vector(alu); add_dst_reg(ctx, alu, &tmp_dst); add_src_reg(ctx, alu, &inst->Src[0].Register); - alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, alu, &tmp_dst); add_src_reg(ctx, alu, &tmp_src); add_src_reg(ctx, alu, &inst->Src[1].Register); @@ -751,7 +751,7 @@ translate_pow(struct fd_compile_context *ctx, break; } - alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); + alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); add_regs_dummy_vector(alu); add_dst_reg(ctx, alu, &inst->Dst[0].Register); add_src_reg(ctx, alu, &tmp_src); @@ -762,7 +762,7 @@ static void translate_tex(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { - struct ir_instruction *instr; + struct ir2_instruction *instr; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; const struct tgsi_src_register *coord; @@ -783,7 +783,7 @@ translate_tex(struct fd_compile_context *ctx, * * dst = texture_sample(unit, coord, bias) */ - instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); /* MAXv: */ add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; @@ -794,7 +794,7 @@ translate_tex(struct fd_compile_context *ctx, add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww"; - instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; add_src_reg(ctx, instr, &inst->Src[0].Register); @@ -804,7 +804,7 @@ translate_tex(struct fd_compile_context *ctx, coord = &inst->Src[0].Register; } - instr = ir_instr_create(next_exec_cf(ctx), IR_FETCH); + instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH); instr->fetch.opc = TEX_FETCH; assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? @@ -828,7 +828,7 @@ translate_tex(struct fd_compile_context *ctx, * the texture to a temp and the use ALU instruction to move * to output */ - instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src); @@ -843,7 +843,7 @@ static void translate_sge_slt(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { - struct ir_instruction *instr; + struct ir2_instruction *instr; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; struct tgsi_src_register tmp_const; @@ -864,12 +864,12 @@ translate_sge_slt(struct fd_compile_context *ctx, get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); - instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE; + add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; add_src_reg(ctx, instr, &inst->Src[1].Register); - instr = ir_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); add_dst_reg(ctx, instr, &inst->Dst[0].Register); /* maybe should re-arrange the syntax some day, but * in assembler/disassembler and what ir.c expects @@ -888,7 +888,7 @@ translate_lrp(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { - struct ir_instruction *instr; + struct ir2_instruction *instr; struct tgsi_dst_register tmp_dst1, tmp_dst2; struct tgsi_src_register tmp_src1, tmp_src2; struct tgsi_src_register tmp_const; @@ -899,25 +899,25 @@ translate_lrp(struct fd_compile_context *ctx, get_immediate(ctx, &tmp_const, fui(1.0)); /* tmp1 = (a * b) */ - instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, instr, &tmp_dst1); add_src_reg(ctx, instr, &inst->Src[0].Register); add_src_reg(ctx, instr, &inst->Src[1].Register); /* tmp2 = (1 - a) */ - instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst2); add_src_reg(ctx, instr, &tmp_const); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE; + add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; /* tmp2 = tmp2 * c */ - instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, instr, &tmp_dst2); add_src_reg(ctx, instr, &tmp_src2); add_src_reg(ctx, instr, &inst->Src[2].Register); /* dst = tmp1 + tmp2 */ - instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src1); add_src_reg(ctx, instr, &tmp_src2); @@ -928,7 +928,7 @@ translate_trig(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { - struct ir_instruction *instr; + struct ir2_instruction *instr; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; struct tgsi_src_register tmp_const; @@ -955,7 +955,7 @@ translate_trig(struct fd_compile_context *ctx, * in assembler/disassembler and what ir.c expects * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 */ - instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst); get_immediate(ctx, &tmp_const, fui(0.5)); add_src_reg(ctx, instr, &tmp_const); @@ -963,12 +963,12 @@ translate_trig(struct fd_compile_context *ctx, get_immediate(ctx, &tmp_const, fui(0.159155)); add_src_reg(ctx, instr, &tmp_const); - instr = ir_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); add_dst_reg(ctx, instr, &tmp_dst); add_src_reg(ctx, instr, &tmp_src); add_src_reg(ctx, instr, &tmp_src); - instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst); get_immediate(ctx, &tmp_const, fui(-3.141593)); add_src_reg(ctx, instr, &tmp_const); @@ -976,7 +976,7 @@ translate_trig(struct fd_compile_context *ctx, get_immediate(ctx, &tmp_const, fui(6.283185)); add_src_reg(ctx, instr, &tmp_const); - instr = ir_instr_create_alu(next_exec_cf(ctx), ~0, op); + instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op); add_regs_dummy_vector(instr); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src); @@ -991,8 +991,8 @@ translate_instruction(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst) { unsigned opc = inst->Instruction.Opcode; - struct ir_instruction *instr; - static struct ir_cf *cf; + struct ir2_instruction *instr; + static struct ir2_cf *cf; if (opc == TGSI_OPCODE_END) return; @@ -1007,14 +1007,14 @@ translate_instruction(struct fd_compile_context *ctx, if ((num == ctx->position) || (num == ctx->psize)) { if (ctx->num_position > 0) { ctx->cf = NULL; - ir_cf_create_alloc(ctx->so->ir, SQ_POSITION, + ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION, ctx->num_position - 1); ctx->num_position = 0; } } else { if (ctx->num_param > 0) { ctx->cf = NULL; - ir_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, + ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, ctx->num_param - 1); ctx->num_param = 0; } @@ -1026,39 +1026,39 @@ translate_instruction(struct fd_compile_context *ctx, /* TODO turn this into a table: */ switch (opc) { case TGSI_OPCODE_MOV: - instr = ir_instr_create_alu(cf, MAXv, ~0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_RCP: - instr = ir_instr_create_alu(cf, ~0, RECIP_IEEE); + instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_RSQ: - instr = ir_instr_create_alu(cf, ~0, RECIPSQ_IEEE); + instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_MUL: - instr = ir_instr_create_alu(cf, MULv, ~0); + instr = ir2_instr_create_alu(cf, MULv, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_ADD: - instr = ir_instr_create_alu(cf, ADDv, ~0); + instr = ir2_instr_create_alu(cf, ADDv, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_DP3: - instr = ir_instr_create_alu(cf, DOT3v, ~0); + instr = ir2_instr_create_alu(cf, DOT3v, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_DP4: - instr = ir_instr_create_alu(cf, DOT4v, ~0); + instr = ir2_instr_create_alu(cf, DOT4v, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_MIN: - instr = ir_instr_create_alu(cf, MINv, ~0); + instr = ir2_instr_create_alu(cf, MINv, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_MAX: - instr = ir_instr_create_alu(cf, MAXv, ~0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_SLT: @@ -1066,31 +1066,31 @@ translate_instruction(struct fd_compile_context *ctx, translate_sge_slt(ctx, inst, opc); break; case TGSI_OPCODE_MAD: - instr = ir_instr_create_alu(cf, MULADDv, ~0); + instr = ir2_instr_create_alu(cf, MULADDv, ~0); add_regs_vector_3(ctx, inst, instr); break; case TGSI_OPCODE_LRP: translate_lrp(ctx, inst, opc); break; case TGSI_OPCODE_FRC: - instr = ir_instr_create_alu(cf, FRACv, ~0); + instr = ir2_instr_create_alu(cf, FRACv, ~0); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_FLR: - instr = ir_instr_create_alu(cf, FLOORv, ~0); + instr = ir2_instr_create_alu(cf, FLOORv, ~0); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_EX2: - instr = ir_instr_create_alu(cf, ~0, EXP_IEEE); + instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_POW: translate_pow(ctx, inst); break; case TGSI_OPCODE_ABS: - instr = ir_instr_create_alu(cf, MAXv, ~0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); add_regs_vector_1(ctx, inst, instr); - instr->regs[1]->flags |= IR_REG_NEGATE; /* src0 */ + instr->regs[1]->flags |= IR2_REG_NEGATE; /* src0 */ break; case TGSI_OPCODE_COS: case TGSI_OPCODE_SIN: @@ -1101,17 +1101,17 @@ translate_instruction(struct fd_compile_context *ctx, translate_tex(ctx, inst, opc); break; case TGSI_OPCODE_CMP: - instr = ir_instr_create_alu(cf, CNDGTEv, ~0); + instr = ir2_instr_create_alu(cf, CNDGTEv, ~0); add_regs_vector_3(ctx, inst, instr); // TODO this should be src0 if regs where in sane order.. - instr->regs[2]->flags ^= IR_REG_NEGATE; /* src1 */ + instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */ break; case TGSI_OPCODE_IF: push_predicate(ctx, &inst->Src[0].Register); - ctx->so->ir->pred = IR_PRED_EQ; + ctx->so->ir->pred = IR2_PRED_EQ; break; case TGSI_OPCODE_ELSE: - ctx->so->ir->pred = IR_PRED_NE; + ctx->so->ir->pred = IR2_PRED_NE; /* not sure if this is required in all cases, but blob compiler * won't combine EQ and NE in same CF: */ @@ -1121,7 +1121,7 @@ translate_instruction(struct fd_compile_context *ctx, pop_predicate(ctx); break; case TGSI_OPCODE_F2I: - instr = ir_instr_create_alu(cf, TRUNCv, ~0); + instr = ir2_instr_create_alu(cf, TRUNCv, ~0); add_regs_vector_1(ctx, inst, instr); break; default: @@ -1162,8 +1162,8 @@ fd_compile_shader(struct fd_program_stateobj *prog, { struct fd_compile_context ctx; - ir_shader_destroy(so->ir); - so->ir = ir_shader_create(); + ir2_shader_destroy(so->ir); + so->ir = ir2_shader_create(); so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index ffd257203ec..22573df9fe1 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -53,7 +53,7 @@ create_shader(enum shader_t type) static void delete_shader(struct fd_shader_stateobj *so) { - ir_shader_destroy(so->ir); + ir2_shader_destroy(so->ir); FREE(so->tokens); FREE(so); } @@ -62,7 +62,7 @@ static struct fd_shader_stateobj * assemble(struct fd_shader_stateobj *so) { free(so->bin); - so->bin = ir_shader_assemble(so->ir, &so->info); + so->bin = ir2_shader_assemble(so->ir, &so->info); if (!so->bin) goto fail; @@ -187,7 +187,7 @@ patch_vtx_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so, /* update vtx fetch instructions: */ for (i = 0; i < so->num_vfetch_instrs; i++) { - struct ir_instruction *instr = so->vfetch_instrs[i]; + struct ir2_instruction *instr = so->vfetch_instrs[i]; struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = &ctx->vertexbuf.vb[elem->vertex_buffer_index]; @@ -241,7 +241,7 @@ patch_tex_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so, /* update tex fetch instructions: */ for (i = 0; i < so->num_tfetch_instrs; i++) { - struct ir_instruction *instr = so->tfetch_instrs[i].instr; + struct ir2_instruction *instr = so->tfetch_instrs[i].instr; unsigned samp_id = so->tfetch_instrs[i].samp_id; unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id); @@ -289,8 +289,8 @@ void fd_program_emit(struct fd_ringbuffer *ring, struct fd_program_stateobj *prog) { - struct ir_shader_info *vsi = &prog->vp->info; - struct ir_shader_info *fsi = &prog->fp->info; + struct ir2_shader_info *vsi = &prog->vp->info; + struct ir2_shader_info *fsi = &prog->fp->info; uint8_t vs_gprs, fs_gprs, vs_export; emit(ring, prog->vp); @@ -322,28 +322,28 @@ static struct fd_shader_stateobj * create_blit_fp(void) { struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir_cf *cf; - struct ir_instruction *instr; + struct ir2_cf *cf; + struct ir2_instruction *instr; if (!so) return NULL; - so->ir = ir_shader_create(); + so->ir = ir2_shader_create(); - cf = ir_cf_create(so->ir, EXEC); + cf = ir2_cf_create(so->ir, EXEC); - instr = ir_instr_create_tex_fetch(cf, 0); - ir_reg_create(instr, 0, "xyzw", 0); - ir_reg_create(instr, 0, "xyx", 0); + instr = ir2_instr_create_tex_fetch(cf, 0); + ir2_reg_create(instr, 0, "xyzw", 0); + ir2_reg_create(instr, 0, "xyx", 0); instr->sync = true; - cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir_cf_create(so->ir, EXEC_END); + cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir2_cf_create(so->ir, EXEC_END); - instr = ir_instr_create_alu(cf, MAXv, ~0); - ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); - ir_reg_create(instr, 0, NULL, 0); - ir_reg_create(instr, 0, NULL, 0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); + ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + ir2_reg_create(instr, 0, NULL, 0); + ir2_reg_create(instr, 0, NULL, 0); return assemble(so); } @@ -364,41 +364,41 @@ static struct fd_shader_stateobj * create_blit_vp(void) { struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir_cf *cf; - struct ir_instruction *instr; + struct ir2_cf *cf; + struct ir2_instruction *instr; if (!so) return NULL; - so->ir = ir_shader_create(); + so->ir = ir2_shader_create(); - cf = ir_cf_create(so->ir, EXEC); + cf = ir2_cf_create(so->ir, EXEC); - instr = ir_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8); + instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8); instr->fetch.is_normalized = true; - ir_reg_create(instr, 1, "xy01", 0); - ir_reg_create(instr, 0, "x", 0); + ir2_reg_create(instr, 1, "xy01", 0); + ir2_reg_create(instr, 0, "x", 0); - instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); + instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); instr->fetch.is_normalized = true; - ir_reg_create(instr, 2, "xyz1", 0); - ir_reg_create(instr, 0, "x", 0); + ir2_reg_create(instr, 2, "xyz1", 0); + ir2_reg_create(instr, 0, "x", 0); - cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0); - cf = ir_cf_create(so->ir, EXEC); + cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0); + cf = ir2_cf_create(so->ir, EXEC); - instr = ir_instr_create_alu(cf, MAXv, ~0); - ir_reg_create(instr, 62, NULL, IR_REG_EXPORT); - ir_reg_create(instr, 2, NULL, 0); - ir_reg_create(instr, 2, NULL, 0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); + ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT); + ir2_reg_create(instr, 2, NULL, 0); + ir2_reg_create(instr, 2, NULL, 0); - cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir_cf_create(so->ir, EXEC_END); + cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir2_cf_create(so->ir, EXEC_END); - instr = ir_instr_create_alu(cf, MAXv, ~0); - ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); - ir_reg_create(instr, 1, NULL, 0); - ir_reg_create(instr, 1, NULL, 0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); + ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + ir2_reg_create(instr, 1, NULL, 0); + ir2_reg_create(instr, 1, NULL, 0); return assemble(so); @@ -414,21 +414,21 @@ static struct fd_shader_stateobj * create_solid_fp(void) { struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir_cf *cf; - struct ir_instruction *instr; + struct ir2_cf *cf; + struct ir2_instruction *instr; if (!so) return NULL; - so->ir = ir_shader_create(); + so->ir = ir2_shader_create(); - cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir_cf_create(so->ir, EXEC_END); + cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir2_cf_create(so->ir, EXEC_END); - instr = ir_instr_create_alu(cf, MAXv, ~0); - ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); - ir_reg_create(instr, 0, NULL, IR_REG_CONST); - ir_reg_create(instr, 0, NULL, IR_REG_CONST); + instr = ir2_instr_create_alu(cf, MAXv, ~0); + ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); + ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); return assemble(so); } @@ -447,30 +447,30 @@ static struct fd_shader_stateobj * create_solid_vp(void) { struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir_cf *cf; - struct ir_instruction *instr; + struct ir2_cf *cf; + struct ir2_instruction *instr; if (!so) return NULL; - so->ir = ir_shader_create(); + so->ir = ir2_shader_create(); - cf = ir_cf_create(so->ir, EXEC); + cf = ir2_cf_create(so->ir, EXEC); - instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); - ir_reg_create(instr, 1, "xyz1", 0); - ir_reg_create(instr, 0, "x", 0); + instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); + ir2_reg_create(instr, 1, "xyz1", 0); + ir2_reg_create(instr, 0, "x", 0); - cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0); - cf = ir_cf_create(so->ir, EXEC); + cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0); + cf = ir2_cf_create(so->ir, EXEC); - instr = ir_instr_create_alu(cf, MAXv, ~0); - ir_reg_create(instr, 62, NULL, IR_REG_EXPORT); - ir_reg_create(instr, 1, NULL, 0); - ir_reg_create(instr, 1, NULL, 0); + instr = ir2_instr_create_alu(cf, MAXv, ~0); + ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT); + ir2_reg_create(instr, 1, NULL, 0); + ir2_reg_create(instr, 1, NULL, 0); - cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir_cf_create(so->ir, EXEC_END); + cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir2_cf_create(so->ir, EXEC_END); return assemble(so); } diff --git a/src/gallium/drivers/freedreno/freedreno_program.h b/src/gallium/drivers/freedreno/freedreno_program.h index e73cf1bbb97..9871b0c3b1d 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.h +++ b/src/gallium/drivers/freedreno/freedreno_program.h @@ -33,7 +33,7 @@ #include "freedreno_context.h" -#include "ir.h" +#include "ir-a2xx.h" #include "disasm.h" struct fd_shader_stateobj { @@ -47,14 +47,14 @@ struct fd_shader_stateobj { * and if one changes, we potentially need to recompile in order to * get varying linkages correct: */ - struct ir_shader_info info; - struct ir_shader *ir; + struct ir2_shader_info info; + struct ir2_shader *ir; /* for vertex shaders, the fetch instructions which need to be * patched up before assembly: */ unsigned num_vfetch_instrs; - struct ir_instruction *vfetch_instrs[64]; + struct ir2_instruction *vfetch_instrs[64]; /* for all shaders, any tex fetch instructions which need to be * patched before assembly: @@ -62,7 +62,7 @@ struct fd_shader_stateobj { unsigned num_tfetch_instrs; struct { unsigned samp_id; - struct ir_instruction *instr; + struct ir2_instruction *instr; } tfetch_instrs[64]; unsigned first_immediate; /* const reg # of first immediate */ diff --git a/src/gallium/drivers/freedreno/ir-a2xx.c b/src/gallium/drivers/freedreno/ir-a2xx.c new file mode 100644 index 00000000000..9d81e2e6f11 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir-a2xx.c @@ -0,0 +1,634 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir-a2xx.h" + +#include +#include +#include +#include + +#include "freedreno_util.h" +#include "instr-a2xx.h" + +#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) +#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) +#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) + +#define REG_MASK 0x3f + +static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr); + +static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir2_shader_info *info); + +static void reg_update_stats(struct ir2_register *reg, + struct ir2_shader_info *info, bool dest); +static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n); +static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg); +static uint32_t reg_alu_dst_swiz(struct ir2_register *reg); +static uint32_t reg_alu_src_swiz(struct ir2_register *reg); + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +static void * ir2_alloc(struct ir2_shader *shader, int sz) +{ + void *ptr = &shader->heap[shader->heap_idx]; + shader->heap_idx += ALIGN(sz, 4); + return ptr; +} + +static char * ir2_strdup(struct ir2_shader *shader, const char *str) +{ + char *ptr = NULL; + if (str) { + int len = strlen(str); + ptr = ir2_alloc(shader, len+1); + memcpy(ptr, str, len); + ptr[len] = '\0'; + } + return ptr; +} + +struct ir2_shader * ir2_shader_create(void) +{ + DEBUG_MSG(""); + return calloc(1, sizeof(struct ir2_shader)); +} + +void ir2_shader_destroy(struct ir2_shader *shader) +{ + DEBUG_MSG(""); + free(shader); +} + +/* resolve addr/cnt/sequence fields in the individual CF's */ +static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info) +{ + uint32_t addr; + unsigned i; + int j; + + addr = shader->cfs_count / 2; + for (i = 0; i < shader->cfs_count; i++) { + struct ir2_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + uint32_t sequence = 0; + + if (cf->exec.addr && (cf->exec.addr != addr)) + WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); + if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) + WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); + + for (j = cf->exec.instrs_count - 1; j >= 0; j--) { + struct ir2_instruction *instr = cf->exec.instrs[j]; + sequence <<= 2; + if (instr->instr_type == IR2_FETCH) + sequence |= 0x1; + if (instr->sync) + sequence |= 0x2; + } + + cf->exec.addr = addr; + cf->exec.cnt = cf->exec.instrs_count; + cf->exec.sequence = sequence; + + addr += cf->exec.instrs_count; + } + } + + info->sizedwords = 3 * addr; + + return 0; +} + +void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) +{ + uint32_t i, j; + uint32_t *ptr, *dwords = NULL; + uint32_t idx = 0; + int ret; + + info->sizedwords = 0; + info->max_reg = -1; + info->max_input_reg = 0; + info->regs_written = 0; + + /* we need an even # of CF's.. insert a NOP if needed */ + if (shader->cfs_count != ALIGN(shader->cfs_count, 2)) + ir2_cf_create(shader, NOP); + + /* first pass, resolve sizes and addresses: */ + ret = shader_resolve(shader, info); + if (ret) { + ERROR_MSG("resolve failed: %d", ret); + goto fail; + } + + ptr = dwords = calloc(1, 4 * info->sizedwords); + + /* second pass, emit CF program in pairs: */ + for (i = 0; i < shader->cfs_count; i += 2) { + instr_cf_t *cfs = (instr_cf_t *)ptr; + ret = cf_emit(shader->cfs[i], &cfs[0]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ret = cf_emit(shader->cfs[i+1], &cfs[1]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + + /* third pass, emit ALU/FETCH: */ + for (i = 0; i < shader->cfs_count; i++) { + struct ir2_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + for (j = 0; j < cf->exec.instrs_count; j++) { + ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); + if (ret) { + ERROR_MSG("instruction emit failed: %d", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + } + } + + return dwords; + +fail: + free(dwords); + return NULL; +} + + +struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type) +{ + struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf)); + DEBUG_MSG("%d", cf_type); + cf->shader = shader; + cf->cf_type = cf_type; + assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); + shader->cfs[shader->cfs_count++] = cf; + return cf; +} + + +/* + * CF instructions: + */ + +static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr) +{ + memset(instr, 0, sizeof(*instr)); + + instr->opc = cf->cf_type; + + switch (cf->cf_type) { + case NOP: + break; + case EXEC: + case EXEC_END: + assert(cf->exec.addr <= 0x1ff); + assert(cf->exec.cnt <= 0x6); + assert(cf->exec.sequence <= 0xfff); + instr->exec.address = cf->exec.addr; + instr->exec.count = cf->exec.cnt; + instr->exec.serialize = cf->exec.sequence; + break; + case ALLOC: + assert(cf->alloc.size <= 0xf); + instr->alloc.size = cf->alloc.size; + switch (cf->alloc.type) { + case SQ_POSITION: + case SQ_PARAMETER_PIXEL: + instr->alloc.buffer_select = cf->alloc.type; + break; + default: + ERROR_MSG("invalid alloc type: %d", cf->alloc.type); + return -1; + } + break; + case COND_EXEC: + case COND_EXEC_END: + case COND_PRED_EXEC: + case COND_PRED_EXEC_END: + case LOOP_START: + case LOOP_END: + case COND_CALL: + case RETURN: + case COND_JMP: + case COND_EXEC_PRED_CLEAN: + case COND_EXEC_PRED_CLEAN_END: + case MARK_VS_FETCH_DONE: + ERROR_MSG("TODO"); + return -1; + } + + return 0; +} + + +struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type) +{ + struct ir2_instruction *instr = + ir2_alloc(cf->shader, sizeof(struct ir2_instruction)); + DEBUG_MSG("%d", instr_type); + instr->shader = cf->shader; + instr->pred = cf->shader->pred; + instr->instr_type = instr_type; + assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); + cf->exec.instrs[cf->exec.instrs_count++] = instr; + return instr; +} + + +/* + * FETCH instructions: + */ + +static int instr_emit_fetch(struct ir2_instruction *instr, + uint32_t *dwords, uint32_t idx, + struct ir2_shader_info *info) +{ + instr_fetch_t *fetch = (instr_fetch_t *)dwords; + int reg = 0; + struct ir2_register *dst_reg = instr->regs[reg++]; + struct ir2_register *src_reg = instr->regs[reg++]; + + memset(fetch, 0, sizeof(*fetch)); + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src_reg, info, false); + + fetch->opc = instr->fetch.opc; + + if (instr->fetch.opc == VTX_FETCH) { + instr_fetch_vtx_t *vtx = &fetch->vtx; + + assert(instr->fetch.stride <= 0xff); + assert(instr->fetch.fmt <= 0x3f); + assert(instr->fetch.const_idx <= 0x1f); + assert(instr->fetch.const_idx_sel <= 0x3); + + vtx->src_reg = src_reg->num; + vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); + vtx->dst_reg = dst_reg->num; + vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); + vtx->must_be_one = 1; + vtx->const_index = instr->fetch.const_idx; + vtx->const_index_sel = instr->fetch.const_idx_sel; + vtx->format_comp_all = !!instr->fetch.is_signed; + vtx->num_format_all = !instr->fetch.is_normalized; + vtx->format = instr->fetch.fmt; + vtx->stride = instr->fetch.stride; + vtx->offset = instr->fetch.offset; + + if (instr->pred != IR2_PRED_NONE) { + vtx->pred_select = 1; + vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; + } + + /* XXX seems like every FETCH but the first has + * this bit set: + */ + vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; + vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; + } else if (instr->fetch.opc == TEX_FETCH) { + instr_fetch_tex_t *tex = &fetch->tex; + + assert(instr->fetch.const_idx <= 0x1f); + + tex->src_reg = src_reg->num; + tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); + tex->dst_reg = dst_reg->num; + tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); + tex->const_idx = instr->fetch.const_idx; + tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; + tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; + tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; + tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->use_comp_lod = 1; + tex->sample_location = SAMPLE_CENTER; + + if (instr->pred != IR2_PRED_NONE) { + tex->pred_select = 1; + tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; + } + + } else { + ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); + return -1; + } + + return 0; +} + +/* + * ALU instructions: + */ + +static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, + struct ir2_shader_info *info) +{ + int reg = 0; + instr_alu_t *alu = (instr_alu_t *)dwords; + struct ir2_register *dst_reg = instr->regs[reg++]; + struct ir2_register *src1_reg; + struct ir2_register *src2_reg; + struct ir2_register *src3_reg; + + memset(alu, 0, sizeof(*alu)); + + /* handle instructions w/ 3 src operands: */ + switch (instr->alu.vector_opc) { + case MULADDv: + case CNDEv: + case CNDGTEv: + case CNDGTv: + case DOT2ADDv: + /* note: disassembler lists 3rd src first, ie: + * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) + * which is the reason for this strange ordering. + */ + src3_reg = instr->regs[reg++]; + break; + default: + src3_reg = NULL; + break; + } + + src1_reg = instr->regs[reg++]; + src2_reg = instr->regs[reg++]; + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src1_reg, info, false); + reg_update_stats(src2_reg, info, false); + + assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0); + assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); + assert((src1_reg->flags & IR2_REG_EXPORT) == 0); + assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); + assert((src2_reg->flags & IR2_REG_EXPORT) == 0); + assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); + + if (instr->alu.vector_opc == ~0) { + alu->vector_opc = MAXv; + alu->vector_write_mask = 0; + } else { + alu->vector_opc = instr->alu.vector_opc; + alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); + } + + alu->vector_dest = dst_reg->num; + alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT); + + // TODO predicate case/condition.. need to add to parser + + alu->src2_reg = src2_reg->num; + alu->src2_swiz = reg_alu_src_swiz(src2_reg); + alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); + alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); + alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); + + alu->src1_reg = src1_reg->num; + alu->src1_swiz = reg_alu_src_swiz(src1_reg); + alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); + alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); + alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); + + alu->vector_clamp = instr->alu.vector_clamp; + alu->scalar_clamp = instr->alu.scalar_clamp; + + if (instr->alu.scalar_opc != ~0) { + struct ir2_register *sdst_reg = instr->regs[reg++]; + + reg_update_stats(sdst_reg, info, true); + + assert(sdst_reg->flags == dst_reg->flags); + + if (src3_reg) { + assert(src3_reg == instr->regs[reg++]); + } else { + src3_reg = instr->regs[reg++]; + } + + alu->scalar_dest = sdst_reg->num; + alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); + alu->scalar_opc = instr->alu.scalar_opc; + } else { + /* not sure if this is required, but adreno compiler seems + * to always set scalar opc to MAXs if it is not used: + */ + alu->scalar_opc = MAXs; + } + + if (src3_reg) { + reg_update_stats(src3_reg, info, false); + + alu->src3_reg = src3_reg->num; + alu->src3_swiz = reg_alu_src_swiz(src3_reg); + alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); + alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); + alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); + } else { + /* not sure if this is required, but adreno compiler seems + * to always set register bank for 3rd src if unused: + */ + alu->src3_sel = 1; + } + + if (instr->pred != IR2_PRED_NONE) { + alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2; + } + + return 0; +} + +static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir2_shader_info *info) +{ + switch (instr->instr_type) { + case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); + case IR2_ALU: return instr_emit_alu(instr, dwords, info); + } + return -1; +} + + +struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, + int num, const char *swizzle, int flags) +{ + struct ir2_register *reg = + ir2_alloc(instr->shader, sizeof(struct ir2_register)); + DEBUG_MSG("%x, %d, %s", flags, num, swizzle); + assert(num <= REG_MASK); + reg->flags = flags; + reg->num = num; + reg->swizzle = ir2_strdup(instr->shader, swizzle); + assert(instr->regs_count < ARRAY_SIZE(instr->regs)); + instr->regs[instr->regs_count++] = reg; + return reg; +} + +static void reg_update_stats(struct ir2_register *reg, + struct ir2_shader_info *info, bool dest) +{ + if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) { + info->max_reg = max(info->max_reg, reg->num); + + if (dest) { + info->regs_written |= (1 << reg->num); + } else if (!(info->regs_written & (1 << reg->num))) { + /* for registers that haven't been written, they must be an + * input register that the thread scheduler (presumably?) + * needs to know about: + */ + info->max_input_reg = max(info->max_input_reg, reg->num); + } + } +} + +static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(reg->swizzle); + + DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); + + for (i = n-1; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + } + } + + return swiz; +} + +static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 3; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + case '0': swiz |= 0x4; break; + case '1': swiz |= 0x5; break; + case '_': swiz |= 0x7; break; + } + } + } else { + swiz = 0x688; + } + + return swiz; +} + +/* actually, a write-mask */ +static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & ~IR2_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 1; + if (reg->swizzle[i] == "xyzw"[i]) { + swiz |= 0x1; + } else if (reg->swizzle[i] != '_') { + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + break; + } + } + } else { + swiz = 0xf; + } + + return swiz; +} + +static uint32_t reg_alu_src_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & IR2_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); + case 'x': swiz |= (0x0 - i) & 0x3; break; + case 'y': swiz |= (0x1 - i) & 0x3; break; + case 'z': swiz |= (0x2 - i) & 0x3; break; + case 'w': swiz |= (0x3 - i) & 0x3; break; + } + } + } else { + swiz = 0x0; + } + + return swiz; +} diff --git a/src/gallium/drivers/freedreno/ir-a2xx.h b/src/gallium/drivers/freedreno/ir-a2xx.h new file mode 100644 index 00000000000..e2c7eff504e --- /dev/null +++ b/src/gallium/drivers/freedreno/ir-a2xx.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IR2_H_ +#define IR2_H_ + +#include +#include + +#include "instr-a2xx.h" + +/* low level intermediate representation of an adreno a2xx shader program */ + +struct ir2_shader; + +struct ir2_shader_info { + uint16_t sizedwords; + int8_t max_reg; /* highest GPR # used by shader */ + uint8_t max_input_reg; + uint64_t regs_written; +}; + +struct ir2_register { + enum { + IR2_REG_CONST = 0x1, + IR2_REG_EXPORT = 0x2, + IR2_REG_NEGATE = 0x4, + IR2_REG_ABS = 0x8, + } flags; + int num; + char *swizzle; +}; + +enum ir2_pred { + IR2_PRED_NONE = 0, + IR2_PRED_EQ = 1, + IR2_PRED_NE = 2, +}; + +struct ir2_instruction { + struct ir2_shader *shader; + enum { + IR2_FETCH, + IR2_ALU, + } instr_type; + enum ir2_pred pred; + int sync; + unsigned regs_count; + struct ir2_register *regs[5]; + union { + /* FETCH specific: */ + struct { + instr_fetch_opc_t opc; + unsigned const_idx; + /* maybe vertex fetch specific: */ + unsigned const_idx_sel; + enum a2xx_sq_surfaceformat fmt; + bool is_signed : 1; + bool is_normalized : 1; + uint32_t stride; + uint32_t offset; + } fetch; + /* ALU specific: */ + struct { + instr_vector_opc_t vector_opc; + instr_scalar_opc_t scalar_opc; + bool vector_clamp : 1; + bool scalar_clamp : 1; + } alu; + }; +}; + +struct ir2_cf { + struct ir2_shader *shader; + instr_cf_opc_t cf_type; + + union { + /* EXEC/EXEC_END specific: */ + struct { + unsigned instrs_count; + struct ir2_instruction *instrs[6]; + uint32_t addr, cnt, sequence; + } exec; + /* ALLOC specific: */ + struct { + instr_alloc_type_t type; /* SQ_POSITION or SQ_PARAMETER_PIXEL */ + int size; + } alloc; + }; +}; + +struct ir2_shader { + unsigned cfs_count; + struct ir2_cf *cfs[0x56]; + uint32_t heap[100 * 4096]; + unsigned heap_idx; + + enum ir2_pred pred; /* pred inherited by newly created instrs */ +}; + +struct ir2_shader * ir2_shader_create(void); +void ir2_shader_destroy(struct ir2_shader *shader); +void * ir2_shader_assemble(struct ir2_shader *shader, + struct ir2_shader_info *info); + +struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type); + +struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type); + +struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, + int num, const char *swizzle, int flags); + +/* some helper fxns: */ + +static inline struct ir2_cf * +ir2_cf_create_alloc(struct ir2_shader *shader, instr_alloc_type_t type, int size) +{ + struct ir2_cf *cf = ir2_cf_create(shader, ALLOC); + if (!cf) + return cf; + cf->alloc.type = type; + cf->alloc.size = size; + return cf; +} +static inline struct ir2_instruction * +ir2_instr_create_alu(struct ir2_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop) +{ + struct ir2_instruction *instr = ir2_instr_create(cf, IR2_ALU); + if (!instr) + return instr; + instr->alu.vector_opc = vop; + instr->alu.scalar_opc = sop; + return instr; +} +static inline struct ir2_instruction * +ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int cis, + enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride) +{ + struct ir2_instruction *instr = instr = ir2_instr_create(cf, IR2_FETCH); + instr->fetch.opc = VTX_FETCH; + instr->fetch.const_idx = ci; + instr->fetch.const_idx_sel = cis; + instr->fetch.fmt = fmt; + instr->fetch.is_signed = is_signed; + instr->fetch.stride = stride; + return instr; +} +static inline struct ir2_instruction * +ir2_instr_create_tex_fetch(struct ir2_cf *cf, int ci) +{ + struct ir2_instruction *instr = instr = ir2_instr_create(cf, IR2_FETCH); + instr->fetch.opc = TEX_FETCH; + instr->fetch.const_idx = ci; + return instr; +} + + +#endif /* IR2_H_ */ diff --git a/src/gallium/drivers/freedreno/ir.c b/src/gallium/drivers/freedreno/ir.c deleted file mode 100644 index 9aa931bc9da..00000000000 --- a/src/gallium/drivers/freedreno/ir.c +++ /dev/null @@ -1,634 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir.h" - -#include -#include -#include -#include - -#include "freedreno_util.h" -#include "instr-a2xx.h" - -#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) -#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) -#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) - -#define REG_MASK 0x3f - -static int cf_emit(struct ir_cf *cf, instr_cf_t *instr); - -static int instr_emit(struct ir_instruction *instr, uint32_t *dwords, - uint32_t idx, struct ir_shader_info *info); - -static void reg_update_stats(struct ir_register *reg, - struct ir_shader_info *info, bool dest); -static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n); -static uint32_t reg_fetch_dst_swiz(struct ir_register *reg); -static uint32_t reg_alu_dst_swiz(struct ir_register *reg); -static uint32_t reg_alu_src_swiz(struct ir_register *reg); - -/* simple allocator to carve allocations out of an up-front allocated heap, - * so that we can free everything easily in one shot. - */ -static void * ir_alloc(struct ir_shader *shader, int sz) -{ - void *ptr = &shader->heap[shader->heap_idx]; - shader->heap_idx += ALIGN(sz, 4); - return ptr; -} - -static char * ir_strdup(struct ir_shader *shader, const char *str) -{ - char *ptr = NULL; - if (str) { - int len = strlen(str); - ptr = ir_alloc(shader, len+1); - memcpy(ptr, str, len); - ptr[len] = '\0'; - } - return ptr; -} - -struct ir_shader * ir_shader_create(void) -{ - DEBUG_MSG(""); - return calloc(1, sizeof(struct ir_shader)); -} - -void ir_shader_destroy(struct ir_shader *shader) -{ - DEBUG_MSG(""); - free(shader); -} - -/* resolve addr/cnt/sequence fields in the individual CF's */ -static int shader_resolve(struct ir_shader *shader, struct ir_shader_info *info) -{ - uint32_t addr; - unsigned i; - int j; - - addr = shader->cfs_count / 2; - for (i = 0; i < shader->cfs_count; i++) { - struct ir_cf *cf = shader->cfs[i]; - if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { - uint32_t sequence = 0; - - if (cf->exec.addr && (cf->exec.addr != addr)) - WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); - if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) - WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); - - for (j = cf->exec.instrs_count - 1; j >= 0; j--) { - struct ir_instruction *instr = cf->exec.instrs[j]; - sequence <<= 2; - if (instr->instr_type == IR_FETCH) - sequence |= 0x1; - if (instr->sync) - sequence |= 0x2; - } - - cf->exec.addr = addr; - cf->exec.cnt = cf->exec.instrs_count; - cf->exec.sequence = sequence; - - addr += cf->exec.instrs_count; - } - } - - info->sizedwords = 3 * addr; - - return 0; -} - -void * ir_shader_assemble(struct ir_shader *shader, struct ir_shader_info *info) -{ - uint32_t i, j; - uint32_t *ptr, *dwords = NULL; - uint32_t idx = 0; - int ret; - - info->sizedwords = 0; - info->max_reg = -1; - info->max_input_reg = 0; - info->regs_written = 0; - - /* we need an even # of CF's.. insert a NOP if needed */ - if (shader->cfs_count != ALIGN(shader->cfs_count, 2)) - ir_cf_create(shader, NOP); - - /* first pass, resolve sizes and addresses: */ - ret = shader_resolve(shader, info); - if (ret) { - ERROR_MSG("resolve failed: %d", ret); - goto fail; - } - - ptr = dwords = calloc(1, 4 * info->sizedwords); - - /* second pass, emit CF program in pairs: */ - for (i = 0; i < shader->cfs_count; i += 2) { - instr_cf_t *cfs = (instr_cf_t *)ptr; - ret = cf_emit(shader->cfs[i], &cfs[0]); - if (ret) { - ERROR_MSG("CF emit failed: %d\n", ret); - goto fail; - } - ret = cf_emit(shader->cfs[i+1], &cfs[1]); - if (ret) { - ERROR_MSG("CF emit failed: %d\n", ret); - goto fail; - } - ptr += 3; - assert((ptr - dwords) <= info->sizedwords); - } - - /* third pass, emit ALU/FETCH: */ - for (i = 0; i < shader->cfs_count; i++) { - struct ir_cf *cf = shader->cfs[i]; - if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { - for (j = 0; j < cf->exec.instrs_count; j++) { - ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); - if (ret) { - ERROR_MSG("instruction emit failed: %d", ret); - goto fail; - } - ptr += 3; - assert((ptr - dwords) <= info->sizedwords); - } - } - } - - return dwords; - -fail: - free(dwords); - return NULL; -} - - -struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type) -{ - struct ir_cf *cf = ir_alloc(shader, sizeof(struct ir_cf)); - DEBUG_MSG("%d", cf_type); - cf->shader = shader; - cf->cf_type = cf_type; - assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); - shader->cfs[shader->cfs_count++] = cf; - return cf; -} - - -/* - * CF instructions: - */ - -static int cf_emit(struct ir_cf *cf, instr_cf_t *instr) -{ - memset(instr, 0, sizeof(*instr)); - - instr->opc = cf->cf_type; - - switch (cf->cf_type) { - case NOP: - break; - case EXEC: - case EXEC_END: - assert(cf->exec.addr <= 0x1ff); - assert(cf->exec.cnt <= 0x6); - assert(cf->exec.sequence <= 0xfff); - instr->exec.address = cf->exec.addr; - instr->exec.count = cf->exec.cnt; - instr->exec.serialize = cf->exec.sequence; - break; - case ALLOC: - assert(cf->alloc.size <= 0xf); - instr->alloc.size = cf->alloc.size; - switch (cf->alloc.type) { - case SQ_POSITION: - case SQ_PARAMETER_PIXEL: - instr->alloc.buffer_select = cf->alloc.type; - break; - default: - ERROR_MSG("invalid alloc type: %d", cf->alloc.type); - return -1; - } - break; - case COND_EXEC: - case COND_EXEC_END: - case COND_PRED_EXEC: - case COND_PRED_EXEC_END: - case LOOP_START: - case LOOP_END: - case COND_CALL: - case RETURN: - case COND_JMP: - case COND_EXEC_PRED_CLEAN: - case COND_EXEC_PRED_CLEAN_END: - case MARK_VS_FETCH_DONE: - ERROR_MSG("TODO"); - return -1; - } - - return 0; -} - - -struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type) -{ - struct ir_instruction *instr = - ir_alloc(cf->shader, sizeof(struct ir_instruction)); - DEBUG_MSG("%d", instr_type); - instr->shader = cf->shader; - instr->pred = cf->shader->pred; - instr->instr_type = instr_type; - assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); - cf->exec.instrs[cf->exec.instrs_count++] = instr; - return instr; -} - - -/* - * FETCH instructions: - */ - -static int instr_emit_fetch(struct ir_instruction *instr, - uint32_t *dwords, uint32_t idx, - struct ir_shader_info *info) -{ - instr_fetch_t *fetch = (instr_fetch_t *)dwords; - int reg = 0; - struct ir_register *dst_reg = instr->regs[reg++]; - struct ir_register *src_reg = instr->regs[reg++]; - - memset(fetch, 0, sizeof(*fetch)); - - reg_update_stats(dst_reg, info, true); - reg_update_stats(src_reg, info, false); - - fetch->opc = instr->fetch.opc; - - if (instr->fetch.opc == VTX_FETCH) { - instr_fetch_vtx_t *vtx = &fetch->vtx; - - assert(instr->fetch.stride <= 0xff); - assert(instr->fetch.fmt <= 0x3f); - assert(instr->fetch.const_idx <= 0x1f); - assert(instr->fetch.const_idx_sel <= 0x3); - - vtx->src_reg = src_reg->num; - vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); - vtx->dst_reg = dst_reg->num; - vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); - vtx->must_be_one = 1; - vtx->const_index = instr->fetch.const_idx; - vtx->const_index_sel = instr->fetch.const_idx_sel; - vtx->format_comp_all = !!instr->fetch.is_signed; - vtx->num_format_all = !instr->fetch.is_normalized; - vtx->format = instr->fetch.fmt; - vtx->stride = instr->fetch.stride; - vtx->offset = instr->fetch.offset; - - if (instr->pred != IR_PRED_NONE) { - vtx->pred_select = 1; - vtx->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0; - } - - /* XXX seems like every FETCH but the first has - * this bit set: - */ - vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; - vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; - } else if (instr->fetch.opc == TEX_FETCH) { - instr_fetch_tex_t *tex = &fetch->tex; - - assert(instr->fetch.const_idx <= 0x1f); - - tex->src_reg = src_reg->num; - tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); - tex->dst_reg = dst_reg->num; - tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); - tex->const_idx = instr->fetch.const_idx; - tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; - tex->min_filter = TEX_FILTER_USE_FETCH_CONST; - tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; - tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; - tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; - tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; - tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; - tex->use_comp_lod = 1; - tex->sample_location = SAMPLE_CENTER; - - if (instr->pred != IR_PRED_NONE) { - tex->pred_select = 1; - tex->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0; - } - - } else { - ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); - return -1; - } - - return 0; -} - -/* - * ALU instructions: - */ - -static int instr_emit_alu(struct ir_instruction *instr, uint32_t *dwords, - struct ir_shader_info *info) -{ - int reg = 0; - instr_alu_t *alu = (instr_alu_t *)dwords; - struct ir_register *dst_reg = instr->regs[reg++]; - struct ir_register *src1_reg; - struct ir_register *src2_reg; - struct ir_register *src3_reg; - - memset(alu, 0, sizeof(*alu)); - - /* handle instructions w/ 3 src operands: */ - switch (instr->alu.vector_opc) { - case MULADDv: - case CNDEv: - case CNDGTEv: - case CNDGTv: - case DOT2ADDv: - /* note: disassembler lists 3rd src first, ie: - * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) - * which is the reason for this strange ordering. - */ - src3_reg = instr->regs[reg++]; - break; - default: - src3_reg = NULL; - break; - } - - src1_reg = instr->regs[reg++]; - src2_reg = instr->regs[reg++]; - - reg_update_stats(dst_reg, info, true); - reg_update_stats(src1_reg, info, false); - reg_update_stats(src2_reg, info, false); - - assert((dst_reg->flags & ~IR_REG_EXPORT) == 0); - assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); - assert((src1_reg->flags & IR_REG_EXPORT) == 0); - assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); - assert((src2_reg->flags & IR_REG_EXPORT) == 0); - assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); - - if (instr->alu.vector_opc == ~0) { - alu->vector_opc = MAXv; - alu->vector_write_mask = 0; - } else { - alu->vector_opc = instr->alu.vector_opc; - alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); - } - - alu->vector_dest = dst_reg->num; - alu->export_data = !!(dst_reg->flags & IR_REG_EXPORT); - - // TODO predicate case/condition.. need to add to parser - - alu->src2_reg = src2_reg->num; - alu->src2_swiz = reg_alu_src_swiz(src2_reg); - alu->src2_reg_negate = !!(src2_reg->flags & IR_REG_NEGATE); - alu->src2_reg_abs = !!(src2_reg->flags & IR_REG_ABS); - alu->src2_sel = !(src2_reg->flags & IR_REG_CONST); - - alu->src1_reg = src1_reg->num; - alu->src1_swiz = reg_alu_src_swiz(src1_reg); - alu->src1_reg_negate = !!(src1_reg->flags & IR_REG_NEGATE); - alu->src1_reg_abs = !!(src1_reg->flags & IR_REG_ABS); - alu->src1_sel = !(src1_reg->flags & IR_REG_CONST); - - alu->vector_clamp = instr->alu.vector_clamp; - alu->scalar_clamp = instr->alu.scalar_clamp; - - if (instr->alu.scalar_opc != ~0) { - struct ir_register *sdst_reg = instr->regs[reg++]; - - reg_update_stats(sdst_reg, info, true); - - assert(sdst_reg->flags == dst_reg->flags); - - if (src3_reg) { - assert(src3_reg == instr->regs[reg++]); - } else { - src3_reg = instr->regs[reg++]; - } - - alu->scalar_dest = sdst_reg->num; - alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); - alu->scalar_opc = instr->alu.scalar_opc; - } else { - /* not sure if this is required, but adreno compiler seems - * to always set scalar opc to MAXs if it is not used: - */ - alu->scalar_opc = MAXs; - } - - if (src3_reg) { - reg_update_stats(src3_reg, info, false); - - alu->src3_reg = src3_reg->num; - alu->src3_swiz = reg_alu_src_swiz(src3_reg); - alu->src3_reg_negate = !!(src3_reg->flags & IR_REG_NEGATE); - alu->src3_reg_abs = !!(src3_reg->flags & IR_REG_ABS); - alu->src3_sel = !(src3_reg->flags & IR_REG_CONST); - } else { - /* not sure if this is required, but adreno compiler seems - * to always set register bank for 3rd src if unused: - */ - alu->src3_sel = 1; - } - - if (instr->pred != IR_PRED_NONE) { - alu->pred_select = (instr->pred == IR_PRED_EQ) ? 3 : 2; - } - - return 0; -} - -static int instr_emit(struct ir_instruction *instr, uint32_t *dwords, - uint32_t idx, struct ir_shader_info *info) -{ - switch (instr->instr_type) { - case IR_FETCH: return instr_emit_fetch(instr, dwords, idx, info); - case IR_ALU: return instr_emit_alu(instr, dwords, info); - } - return -1; -} - - -struct ir_register * ir_reg_create(struct ir_instruction *instr, - int num, const char *swizzle, int flags) -{ - struct ir_register *reg = - ir_alloc(instr->shader, sizeof(struct ir_register)); - DEBUG_MSG("%x, %d, %s", flags, num, swizzle); - assert(num <= REG_MASK); - reg->flags = flags; - reg->num = num; - reg->swizzle = ir_strdup(instr->shader, swizzle); - assert(instr->regs_count < ARRAY_SIZE(instr->regs)); - instr->regs[instr->regs_count++] = reg; - return reg; -} - -static void reg_update_stats(struct ir_register *reg, - struct ir_shader_info *info, bool dest) -{ - if (!(reg->flags & (IR_REG_CONST|IR_REG_EXPORT))) { - info->max_reg = max(info->max_reg, reg->num); - - if (dest) { - info->regs_written |= (1 << reg->num); - } else if (!(info->regs_written & (1 << reg->num))) { - /* for registers that haven't been written, they must be an - * input register that the thread scheduler (presumably?) - * needs to know about: - */ - info->max_input_reg = max(info->max_input_reg, reg->num); - } - } -} - -static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n) -{ - uint32_t swiz = 0; - int i; - - assert(reg->flags == 0); - assert(reg->swizzle); - - DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); - - for (i = n-1; i >= 0; i--) { - swiz <<= 2; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); - case 'x': swiz |= 0x0; break; - case 'y': swiz |= 0x1; break; - case 'z': swiz |= 0x2; break; - case 'w': swiz |= 0x3; break; - } - } - - return swiz; -} - -static uint32_t reg_fetch_dst_swiz(struct ir_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert(reg->flags == 0); - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 3; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); - case 'x': swiz |= 0x0; break; - case 'y': swiz |= 0x1; break; - case 'z': swiz |= 0x2; break; - case 'w': swiz |= 0x3; break; - case '0': swiz |= 0x4; break; - case '1': swiz |= 0x5; break; - case '_': swiz |= 0x7; break; - } - } - } else { - swiz = 0x688; - } - - return swiz; -} - -/* actually, a write-mask */ -static uint32_t reg_alu_dst_swiz(struct ir_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert((reg->flags & ~IR_REG_EXPORT) == 0); - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 1; - if (reg->swizzle[i] == "xyzw"[i]) { - swiz |= 0x1; - } else if (reg->swizzle[i] != '_') { - ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); - break; - } - } - } else { - swiz = 0xf; - } - - return swiz; -} - -static uint32_t reg_alu_src_swiz(struct ir_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert((reg->flags & IR_REG_EXPORT) == 0); - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 2; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); - case 'x': swiz |= (0x0 - i) & 0x3; break; - case 'y': swiz |= (0x1 - i) & 0x3; break; - case 'z': swiz |= (0x2 - i) & 0x3; break; - case 'w': swiz |= (0x3 - i) & 0x3; break; - } - } - } else { - swiz = 0x0; - } - - return swiz; -} diff --git a/src/gallium/drivers/freedreno/ir.h b/src/gallium/drivers/freedreno/ir.h deleted file mode 100644 index a6d12193b29..00000000000 --- a/src/gallium/drivers/freedreno/ir.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR_H_ -#define IR_H_ - -#include -#include - -#include "instr-a2xx.h" - -/* low level intermediate representation of an adreno a2xx shader program */ - -struct ir_shader; - -struct ir_shader_info { - uint16_t sizedwords; - int8_t max_reg; /* highest GPR # used by shader */ - uint8_t max_input_reg; - uint64_t regs_written; -}; - -struct ir_register { - enum { - IR_REG_CONST = 0x1, - IR_REG_EXPORT = 0x2, - IR_REG_NEGATE = 0x4, - IR_REG_ABS = 0x8, - } flags; - int num; - char *swizzle; -}; - -enum ir_pred { - IR_PRED_NONE = 0, - IR_PRED_EQ = 1, - IR_PRED_NE = 2, -}; - -struct ir_instruction { - struct ir_shader *shader; - enum { - IR_FETCH, - IR_ALU, - } instr_type; - enum ir_pred pred; - int sync; - unsigned regs_count; - struct ir_register *regs[5]; - union { - /* FETCH specific: */ - struct { - instr_fetch_opc_t opc; - unsigned const_idx; - /* maybe vertex fetch specific: */ - unsigned const_idx_sel; - enum a2xx_sq_surfaceformat fmt; - bool is_signed : 1; - bool is_normalized : 1; - uint32_t stride; - uint32_t offset; - } fetch; - /* ALU specific: */ - struct { - instr_vector_opc_t vector_opc; - instr_scalar_opc_t scalar_opc; - bool vector_clamp : 1; - bool scalar_clamp : 1; - } alu; - }; -}; - -struct ir_cf { - struct ir_shader *shader; - instr_cf_opc_t cf_type; - - union { - /* EXEC/EXEC_END specific: */ - struct { - unsigned instrs_count; - struct ir_instruction *instrs[6]; - uint32_t addr, cnt, sequence; - } exec; - /* ALLOC specific: */ - struct { - instr_alloc_type_t type; /* SQ_POSITION or SQ_PARAMETER_PIXEL */ - int size; - } alloc; - }; -}; - -struct ir_shader { - unsigned cfs_count; - struct ir_cf *cfs[0x56]; - uint32_t heap[100 * 4096]; - unsigned heap_idx; - - enum ir_pred pred; /* pred inherited by newly created instrs */ -}; - -struct ir_shader * ir_shader_create(void); -void ir_shader_destroy(struct ir_shader *shader); -void * ir_shader_assemble(struct ir_shader *shader, - struct ir_shader_info *info); - -struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type); - -struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type); - -struct ir_register * ir_reg_create(struct ir_instruction *instr, - int num, const char *swizzle, int flags); - -/* some helper fxns: */ - -static inline struct ir_cf * -ir_cf_create_alloc(struct ir_shader *shader, instr_alloc_type_t type, int size) -{ - struct ir_cf *cf = ir_cf_create(shader, ALLOC); - if (!cf) - return cf; - cf->alloc.type = type; - cf->alloc.size = size; - return cf; -} -static inline struct ir_instruction * -ir_instr_create_alu(struct ir_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop) -{ - struct ir_instruction *instr = ir_instr_create(cf, IR_ALU); - if (!instr) - return instr; - instr->alu.vector_opc = vop; - instr->alu.scalar_opc = sop; - return instr; -} -static inline struct ir_instruction * -ir_instr_create_vtx_fetch(struct ir_cf *cf, int ci, int cis, - enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride) -{ - struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH); - instr->fetch.opc = VTX_FETCH; - instr->fetch.const_idx = ci; - instr->fetch.const_idx_sel = cis; - instr->fetch.fmt = fmt; - instr->fetch.is_signed = is_signed; - instr->fetch.stride = stride; - return instr; -} -static inline struct ir_instruction * -ir_instr_create_tex_fetch(struct ir_cf *cf, int ci) -{ - struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH); - instr->fetch.opc = TEX_FETCH; - instr->fetch.const_idx = ci; - return instr; -} - - -#endif /* IR_H_ */