}
static void
-nv_do_print_program(void *priv, struct nv_basic_block *b)
+nv_do_print_function(void *priv, struct nv_basic_block *b)
{
struct nv_instruction *i = b->phi;
}
void
-nv_print_program(struct nv_basic_block *root)
+nv_print_function(struct nv_basic_block *root)
{
- nv_pc_pass_in_order(root, nv_do_print_program, root);
+ if (root->subroutine)
+ debug_printf("SUBROUTINE %i\n", root->subroutine);
+ else
+ debug_printf("MAIN\n");
- debug_printf("END\n\n");
+ nv_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nv_print_program(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i])
+ nv_print_function(pc->root[i]);
}
static INLINE void
if (!pc)
return 1;
+ pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
+ if (!pc->root) {
+ FREE(pc);
+ return 1;
+ }
+ pc->num_subroutines = ti->subr_nr;
+
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* optimization */
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* register allocation */
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* prepare for emission */
out:
nv_pc_free_refs(pc);
- if (ret) {
+
+ if (pc->bb_list)
+ FREE(pc->bb_list);
+
+ if (ret) { /* on success, these will be referenced by nv50_program */
if (pc->emit)
- free(pc->emit);
+ FREE(pc->emit);
if (pc->immd_buf)
- free(pc->immd_buf);
+ FREE(pc->immd_buf);
if (pc->fixups)
- free(pc->fixups);
+ FREE(pc->fixups);
}
- free(pc);
-
+ FREE(pc);
return ret;
}
ubyte in_kind[8];
int id;
- struct nv_basic_block *last_visitor;
+ int subroutine;
uint priv;
uint pass_seq;
bin[fixup->offset / 4] = val;
}
-struct nv_pc {
- struct nv50_translation_info *ti;
+struct nv50_translation_info;
- struct nv_basic_block *root;
+struct nv_pc {
+ struct nv_basic_block **root;
struct nv_basic_block *current_block;
struct nv_basic_block *parent_block;
int num_instructions;
int num_refs;
int num_blocks;
+ int num_subroutines;
int max_reg[4];
/* nv50_pc.c */
-void nv_print_program(struct nv_basic_block *b);
+void nv_print_function(struct nv_basic_block *root);
+void nv_print_program(struct nv_pc *);
boolean nv_op_commutative(uint opcode);
int nv50_indirect_opnd(struct nv_instruction *);
pc->bin_size += b->bin_size *= 4;
}
-int
-nv_pc_exec_pass2(struct nv_pc *pc)
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass pass;
pass.pc = pc;
pc->pass_seq++;
- nv_pass_flatten(&pass, pc->root);
+
+ nv_pass_flatten(&pass, root);
+
+ nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
+}
+
+int
+nv_pc_exec_pass2(struct nv_pc *pc)
+{
+ int i, ret;
NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
- pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
- pc->num_blocks = 0;
+ pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0]));
- nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
+ pc->num_blocks = 0;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+ return ret;
return 0;
}
return 0;
}
-int
-nv_pc_exec_pass0(struct nv_pc *pc)
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass_reld_elim *reldelim;
struct nv_pass pass;
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
- ret = nv_pass_lower_arith(&pass, pc->root);
+ ret = nv_pass_lower_arith(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_loads(&pass, pc->root);
+ ret = nv_pass_fold_loads(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_stores(&pass, pc->root);
+ ret = nv_pass_fold_stores(&pass, root);
if (ret)
return ret;
reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
reldelim->pc = pc;
pc->pass_seq++;
- ret = nv_pass_reload_elim(reldelim, pc->root);
+ ret = nv_pass_reload_elim(reldelim, root);
FREE(reldelim);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_cse(&pass, pc->root);
+ ret = nv_pass_cse(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_lower_mods(&pass, pc->root);
+ ret = nv_pass_lower_mods(&pass, root);
if (ret)
return ret;
do {
dce.removed = 0;
pc->pass_seq++;
- ret = nv_pass_dce(&dce, pc->root);
+ ret = nv_pass_dce(&dce, root);
if (ret)
return ret;
} while (dce.removed);
- ret = nv_pass_tex_mask(&pass, pc->root);
+ ret = nv_pass_tex_mask(&pass, root);
if (ret)
return ret;
return ret;
}
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
return 0;
}
-int
-nv_pc_exec_pass1(struct nv_pc *pc)
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pc_pass *ctx;
int i, ret;
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
pc->pass_seq++;
- ret = pass_generate_phi_movs(ctx, pc->root);
+ ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
- ret = pass_build_live_sets(ctx, pc->root);
+ ret = pass_build_live_sets(ctx, root);
assert(!ret && "live sets");
if (ret) {
NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
}
pc->pass_seq++;
- nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
+ nv_pc_pass_in_order(root, pass_order_instructions, ctx);
pc->pass_seq++;
- ret = pass_build_intervals(ctx, pc->root);
+ ret = pass_build_intervals(ctx, root);
assert(!ret && "build intervals");
if (ret) {
NOUVEAU_ERR("failed to build live intervals\n");
FREE(ctx);
return ret;
}
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
int s, c, k;
unsigned mask;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ ti->subr[ti->subr_nr].pos = id - 1;
+ ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
+ ++ti->subr_nr;
+ }
+
if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ dst = &inst->Dst[0].Register;
+
for (c = 0; c < 4; ++c) {
- dst = &inst->Dst[0].Register;
- if (inst->Dst[0].Register.Indirect)
+ if (dst->Indirect)
nv50_indirect_outputs(ti, id);
if (!(dst->WriteMask & (1 << c)))
continue;
}
}
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nv50_subroutine *subr,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File != TGSI_FILE_TEMPORARY)
+ continue;
+ mask = nv50_tgsi_src_mask(inst, s);
+
+ assert(!inst->Src[s].Register.Indirect);
+
+ for (c = 0; c < 4; ++c) {
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+ }
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c)
+ if (dst->WriteMask & (1 << c))
+ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+ }
+}
+
static void
prog_immediate(struct nv50_translation_info *ti,
const struct tgsi_full_immediate *imm)
{
struct nv50_program *p = ti->p;
struct tgsi_parse_context parse;
- int ret;
+ int ret, i;
p->vp.edgeflag = 0x40;
p->vp.psiz = 0x40;
tgsi_dump(p->pipe.tokens, 0);
#endif
+ ti->subr =
+ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
}
}
+ /* Scan to determine which registers are inputs/outputs of a subroutine. */
+ for (i = 0; i < ti->subr_nr; ++i) {
+ int pc = ti->subr[i].id;
+ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+ }
+
p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
FREE(ti->immd32_ty);
if (ti->insns)
FREE(ti->insns);
+ if (ti->subr)
+ FREE(ti->subr);
FREE(ti);
return ret ? FALSE : TRUE;
}
#include "tgsi/tgsi_scan.h"
#include "nouveau/nouveau_class.h"
+#define NV50_CAP_MAX_PROGRAM_TEMPS (128 / 4)
+
struct nv50_varying {
uint8_t id; /* tgsi index */
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
#define NV50_INTERP_FLAT (1 << 1)
#define NV50_INTERP_CENTROID (1 << 2)
-#define NV50_PROG_MAX_SUBROUTINES 8
-
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
struct nv50_subroutine {
- int id;
- uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */
- uint32_t retv[4][1];
+ unsigned id;
+ unsigned pos;
+ /* function inputs and outputs */
+ uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
+ uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
};
struct nv50_translation_info {
unsigned immd32_nr;
ubyte *immd32_ty;
ubyte edgeflag_out;
- struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES];
- int subr_nr;
+ struct nv50_subroutine *subr;
+ unsigned subr_nr;
};
int nv50_generate_code(struct nv50_translation_info *ti);
#include "nv50_context.h"
#include "nv50_screen.h"
#include "nv50_resource.h"
+#include "nv50_program.h"
#include "nouveau/nouveau_stateobj.h"
return 0;
case PIPE_CAP_MAX_VS_TEMPS:
case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */
- return 128 / 4;
+ return NV50_CAP_MAX_PROGRAM_TEMPS;
case PIPE_CAP_DEPTH_CLAMP:
return 1;
default:
struct bld_context *bld = CALLOC_STRUCT(bld_context);
int c;
- pc->root = pc->current_block = new_basic_block(pc);
+ pc->root[0] = pc->current_block = new_basic_block(pc);
bld->pc = pc;
bld->ti = ti;