From 5811c6926450c4aafd2f9c87a2c6fe73b517f2c6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 25 Jul 2010 22:21:38 +0200 Subject: [PATCH] nv50: simple reload elimination and local CSE --- src/gallium/drivers/nv50/nv50_pc.c | 18 +++ src/gallium/drivers/nv50/nv50_pc.h | 10 +- src/gallium/drivers/nv50/nv50_pc_optimize.c | 168 +++++++++++++++++--- src/gallium/drivers/nv50/nv50_pc_print.c | 6 +- 4 files changed, 178 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 89dbc7aa20e..e09f94074d0 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -163,6 +163,24 @@ nv_nvi_refcount(struct nv_instruction *nvi) return rc; } +int +nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, + struct nv_value *new_val) +{ + int i, n; + + if (old_val == new_val) + return old_val->refc; + + for (i = 0, n = 0; i < pc->num_refs; ++i) { + if (pc->refs[i]->value == old_val) { + ++n; + nv_reference(pc, &pc->refs[i], new_val); + } + } + return n; +} + static void nv_pc_free_refs(struct nv_pc *pc) { diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 3db300dabbc..ffcdaf44af5 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -363,11 +363,11 @@ new_ref(struct nv_pc *pc, struct nv_value *val) const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); - pc->refs = REALLOC(pc->refs, old_size, new_size); + pc->refs = REALLOC(pc->refs, old_size, new_size); - ref = CALLOC(64, sizeof(struct nv_ref)); - for (i = 0; i < 64; ++i) - pc->refs[pc->num_refs + i] = &ref[i]; + ref = CALLOC(64, sizeof(struct nv_ref)); + for (i = 0; i < 64; ++i) + pc->refs[pc->num_refs + i] = &ref[i]; } ref = pc->refs[pc->num_refs++]; @@ -426,6 +426,8 @@ int nv_nvi_refcount(struct nv_instruction *); void nv_nvi_delete(struct nv_instruction *); void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *); void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *); +int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, + struct nv_value *new_val); int nv_pc_exec_pass0(struct nv_pc *pc); int nv_pc_exec_pass1(struct nv_pc *pc); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index a514c59e6a1..0018131fb5e 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -570,31 +570,99 @@ nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b) } #endif -/* TODO: reload elimination, redundant store elimination */ +/* TODO: redundant store elimination */ -struct nv_pass_reldelim { +struct load_record { + struct load_record *next; + uint64_t data; + struct nv_value *value; +}; + +#define LOAD_RECORD_POOL_SIZE 1024 + +struct nv_pass_reld_elim { struct nv_pc *pc; + + struct load_record *imm; + struct load_record *mem_s; + struct load_record *mem_v; + struct load_record *mem_c[16]; + struct load_record *mem_l; + + struct load_record pool[LOAD_RECORD_POOL_SIZE]; + int alloc; }; static int -nv_pass_reload_elim(struct nv_pass_reldelim *ctx, struct nv_basic_block *b) +nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) { - int j; + struct load_record **rec, *it; struct nv_instruction *ld, *next; + uint64_t data; + struct nv_value *val; + int j; for (ld = b->entry; ld; ld = next) { next = ld->next; + if (!ld->src[0]) + continue; + val = ld->src[0]->value; + rec = NULL; if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { - + data = val->reg.id; + rec = &ctx->mem_v; } else if (ld->opcode == NV_OP_LDA) { - + data = val->reg.id; + if (val->reg.file >= NV_FILE_MEM_C(0) && + val->reg.file <= NV_FILE_MEM_C(15)) + rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; + else + if (val->reg.file == NV_FILE_MEM_S) + rec = &ctx->mem_s; + else + if (val->reg.file == NV_FILE_MEM_L) + rec = &ctx->mem_l; } else - if (ld->opcode == NV_OP_MOV) { - + if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { + data = val->reg.imm.u32; + rec = &ctx->imm; + } + + if (!rec || !ld->def[0]->refc) + continue; + + for (it = *rec; it; it = it->next) + if (it->data == data) + break; + + if (it) { +#if 1 + nvcg_replace_value(ctx->pc, ld->def[0], it->value); +#else + ld->opcode = NV_OP_MOV; + nv_reference(ctx->pc, &ld->src[0], it->value); +#endif + } else { + if (ctx->alloc == LOAD_RECORD_POOL_SIZE) + continue; + it = &ctx->pool[ctx->alloc++]; + it->next = *rec; + it->data = data; + it->value = ld->def[0]; + *rec = it; } } + + ctx->imm = NULL; + ctx->mem_s = NULL; + ctx->mem_v = NULL; + for (j = 0; j < 16; ++j) + ctx->mem_c[j] = NULL; + ctx->mem_l = NULL; + ctx->alloc = 0; + DESCEND_ARBITRARY(j, nv_pass_reload_elim); return 0; @@ -678,23 +746,74 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* local common subexpression elimination, stupid O(n^2) implementation */ +static int +nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) +{ + struct nv_instruction *ir, *ik, *next; + struct nv_instruction *entry = b->phi ? b->phi : b->entry; + int s; + unsigned int reps; + + do { + reps = 0; + for (ir = entry; ir; ir = next) { + next = ir->next; + for (ik = entry; ik != ir; ik = ik->next) { + if (ir->opcode != ik->opcode) + continue; + + if (ik->opcode == NV_OP_LDA || + ik->opcode == NV_OP_STA || + ik->opcode == NV_OP_MOV || + nv_is_vector_op(ik->opcode)) + continue; /* ignore loads, stores & moves */ + + if (ik->src[4] || ir->src[4]) + continue; /* don't mess with address registers */ + + for (s = 0; s < 3; ++s) { + struct nv_value *a, *b; + + if (!ik->src[s]) { + if (ir->src[s]) + break; + continue; + } + if (ik->src[s]->mod != ir->src[s]->mod) + break; + a = ik->src[s]->value; + b = ir->src[s]->value; + if (a == b) + continue; + if (a->reg.file != b->reg.file || + a->reg.id < 0 || + a->reg.id != b->reg.id) + break; + } + if (s == 3) { + nv_nvi_delete(ir); + ++reps; + nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]); + break; + } + } + } + } while(reps); + + DESCEND_ARBITRARY(s, nv_pass_cse); + + return 0; +} + int nv_pc_exec_pass0(struct nv_pc *pc) { - struct nv_pass_reldelim *reldelim; + struct nv_pass_reld_elim *reldelim; struct nv_pass pass; struct nv_pass_dce dce; int ret; - reldelim = CALLOC_STRUCT(nv_pass_reldelim); - reldelim->pc = pc; - - ret = nv_pass_reload_elim(reldelim, pc->root); - - FREE(reldelim); - if (ret) - return ret; - pass.pc = pc; pc->pass_seq++; @@ -720,6 +839,19 @@ nv_pc_exec_pass0(struct nv_pc *pc) if (ret) return ret; + reldelim = CALLOC_STRUCT(nv_pass_reld_elim); + reldelim->pc = pc; + pc->pass_seq++; + ret = nv_pass_reload_elim(reldelim, pc->root); + FREE(reldelim); + if (ret) + return ret; + + pc->pass_seq++; + ret = nv_pass_cse(&pass, pc->root); + if (ret) + return ret; + pc->pass_seq++; ret = nv_pass_lower_mods(&pass, pc->root); if (ret) diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c index 00b50b4edce..82080779c37 100644 --- a/src/gallium/drivers/nv50/nv50_pc_print.c +++ b/src/gallium/drivers/nv50/nv50_pc_print.c @@ -181,7 +181,7 @@ nv_print_address(const char c, int buf, struct nv_value *a, int offset) static INLINE void nv_print_cond(struct nv_instruction *nvi) { - PRINT("%s%s%s$c%i ", + PRINT("%s%s %s$c%i ", gree, nv_cond_name(nvi->cc), mgta, nv_value_id(nvi->flags_src->value)); } @@ -198,7 +198,7 @@ nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type) PRINT(" %s%s", gree, nv_type_name(type)); if (!nv_value_allocated(value)) - reg_pfx = '%'; + reg_pfx = nv_value_allocated(value->join) ? '&' : '%'; switch (value->reg.file) { case NV_FILE_GPR: @@ -268,6 +268,8 @@ nv_print_instruction(struct nv_instruction *i) { int j; + PRINT("%i: ", i->serial); + if (i->flags_src) nv_print_cond(i); -- 2.30.2