From 7a4a537be1460b09b192fdf4d92680aad6c9e951 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 12 Sep 2010 00:46:38 +0200 Subject: [PATCH] nv50: reduce bb_reachable_by runtime from pot to linear As a by-product, remove the memory leak of nv_basic_blocks. --- src/gallium/drivers/nv50/nv50_pc.c | 105 +++++++++++++++++--- src/gallium/drivers/nv50/nv50_pc.h | 16 ++- src/gallium/drivers/nv50/nv50_pc_optimize.c | 4 +- 3 files changed, 104 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index e4df742a800..e063888eb5f 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -340,6 +340,66 @@ nv_print_program(struct nv_pc *pc) nv_print_function(pc->root[i]); } +#ifdef NV50_PC_DEBUG +static void +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) +{ + int i; + + b->pass_seq = pc->pass_seq; + + fprintf(f, "\t%i [shape=box]\n", b->id); + + for (i = 0; i < 2; ++i) { + if (!b->out[i]) + continue; + switch (b->out_kind[i]) { + case CFG_EDGE_FORWARD: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_ENTER: + fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_LEAVE: + fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_BACK: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + continue; + case CFG_EDGE_FAKE: + fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); + break; + default: + assert(0); + break; + } + if (b->out[i]->pass_seq < pc->pass_seq) + nv_do_print_cfgraph(pc, f, b->out[i]); + } +} + +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ +static void +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) +{ + FILE *f; + + f = fopen(filepath, "a"); + if (!f) + return; + + fprintf(f, "digraph G {\n"); + + ++pc->pass_seq; + + nv_do_print_cfgraph(pc, f, pc->root[subr]); + + fprintf(f, "}\n"); + + fclose(f); +} +#endif + static INLINE void nvcg_show_bincode(struct nv_pc *pc) { @@ -393,6 +453,7 @@ nv50_generate_code(struct nv50_translation_info *ti) { struct nv_pc *pc; int ret; + int i; pc = CALLOC_STRUCT(nv_pc); if (!pc) @@ -428,6 +489,7 @@ nv50_generate_code(struct nv50_translation_info *ti) goto out; #ifdef NV50PC_DEBUG nv_print_program(pc); + nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0); #endif /* prepare for emission */ @@ -461,8 +523,8 @@ nv50_generate_code(struct nv50_translation_info *ti) out: nv_pc_free_refs(pc); - if (pc->bb_list) - FREE(pc->bb_list); + for (i = 0; i < pc->num_blocks; ++i) + FREE(pc->bb_list[i]); if (ret) { /* on success, these will be referenced by nv50_program */ if (pc->emit) @@ -644,23 +706,38 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) return j ? TRUE : FALSE; } -/* check if bf (future) can be reached from bp (past) */ +/* check if @bf (future) can be reached from @bp (past), stop at @bt */ boolean nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, struct nv_basic_block *bt) { - if (bf == bp) - return TRUE; - if (bp == bt) - return FALSE; + struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; + int i, p, n; - if (bp->out[0] && !IS_WALL_EDGE(bp->out_kind[0]) && - nvbb_reachable_by(bf, bp->out[0], bt)) - return TRUE; - if (bp->out[1] && !IS_WALL_EDGE(bp->out_kind[1]) && - nvbb_reachable_by(bf, bp->out[1], bt)) - return TRUE; - return FALSE; + p = 0; + n = 1; + q[0] = bp; + + while (p < n) { + b = q[p++]; + + if (b == bf) + break; + if (b == bt) + continue; + assert(n <= (1024 - 2)); + + for (i = 0; i < 2; ++i) { + if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { + q[n] = b->out[i]; + q[n++]->priv = 1; + } + } + } + for (--n; n >= 0; --n) + q[n]->priv = 0; + + return (b == bf); } static struct nv_basic_block * diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index ccddae063ce..e8d99423072 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -144,6 +144,8 @@ #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) +#define NV_PC_MAX_BASIC_BLOCKS 1024 + static INLINE boolean nv_is_vector_op(uint opcode) { @@ -284,7 +286,7 @@ struct nv_basic_block { int id; int subroutine; - uint priv; + uint priv; /* reset to 0 after you're done */ uint pass_seq; uint32_t bin_pos; /* position, size in emitted code */ @@ -328,7 +330,7 @@ struct nv_pc { struct nv_value values[NV_PC_MAX_VALUES]; struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; struct nv_ref **refs; - struct nv_basic_block **bb_list; + struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; int num_values; int num_instructions; int num_refs; @@ -437,9 +439,15 @@ new_ref(struct nv_pc *pc, struct nv_value *val) static INLINE struct nv_basic_block * new_basic_block(struct nv_pc *pc) { - struct nv_basic_block *bb = CALLOC_STRUCT(nv_basic_block); + struct nv_basic_block *bb; + + if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) + return NULL; + + bb = CALLOC_STRUCT(nv_basic_block); - bb->id = pc->num_blocks++; + bb->id = pc->num_blocks; + pc->bb_list[pc->num_blocks++] = bb; return bb; } diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 09d232abda0..edda6c0691f 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -238,9 +238,7 @@ nv_pc_exec_pass2(struct nv_pc *pc) NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks); - pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0])); - - pc->num_blocks = 0; + pc->num_blocks = 0; /* will reorder bb_list */ for (i = 0; i < pc->num_subroutines + 1; ++i) if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) -- 2.30.2