From f013b4f8f1329982727691a55cc263e3011d02bf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sat, 29 Jan 2011 12:10:37 +0100 Subject: [PATCH] r600g: bugfixing register remapping --- src/gallium/drivers/r600/r600_asm.c | 368 ++++++++++++++++------------ 1 file changed, 206 insertions(+), 162 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 935b8454560..f4ff2fc3d43 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -263,10 +263,10 @@ static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r6 { struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, next->list.prev, list); unsigned old_id = cf->id; - unsigned new_id = prev->id + 2; + unsigned new_id = next->list.prev == &bc->cf ? 0 : prev->id + 2; struct r600_bc_cf *other; - if (prev == cf) + if (prev == cf || next == cf) return; /* position hasn't changed */ LIST_DEL(&cf->list); @@ -1189,21 +1189,19 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; } - /* replace special constants */ + /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { + if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { + bc->ngpr = nalu->src[i].sel + 1; + } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) r600_bc_special_constants( nalu->src[i].value[nalu->src[i].chan], &nalu->src[i].sel, &nalu->src[i].neg); - - if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { - bc->ngpr = nalu->src[i].sel + 1; - } } if (nalu->dst.sel >= bc->ngpr) { bc->ngpr = nalu->dst.sel + 1; } - LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; @@ -1474,8 +1472,8 @@ enum cf_class CF_CLASS_EXPORT, CF_CLASS_OTHER }; - -static enum cf_class get_cf_class(struct r600_bc_cf *cf) + +static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): @@ -1493,6 +1491,8 @@ static enum cf_class get_cf_class(struct r600_bc_cf *cf) case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: return CF_CLASS_EXPORT; case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: @@ -1518,7 +1518,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) unsigned id = cf->id; unsigned end_of_program = bc->cf.prev == &cf->list; - switch (get_cf_class(cf)) { + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: assert(!end_of_program); bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | @@ -1574,81 +1574,103 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) struct gpr_usage_range { int replacement; - int32_t start; - int32_t end; + int rel_block; + int start; + int end; }; struct gpr_usage { unsigned channels:4; - int32_t first_write; - int32_t last_write[4]; + int first_write; + int last_write[4]; unsigned nranges; struct gpr_usage_range *ranges; }; +static struct gpr_usage_range* last_gpr_usage_range(struct gpr_usage *usage) +{ + if (usage->nranges) + return usage->ranges + usage->nranges - 1; + else + return NULL; +} + static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage) { + struct gpr_usage_range *range; + usage->nranges++; usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range)); if (!usage->ranges) return NULL; - return &usage->ranges[usage->nranges-1]; + + range = last_gpr_usage_range(usage); + range->replacement = -1; /* no prefered replacement */ + range->rel_block = -1; + range->start = -1; + range->end = -1; + + return range; } -static void notice_gpr_read(struct gpr_usage *usage, int32_t id, unsigned chan) +static void notice_gpr_read(struct gpr_usage *usage, int id, unsigned chan) { + struct gpr_usage_range* range; + usage->channels |= 1 << chan; usage->first_write = -1; if (!usage->nranges) { - struct gpr_usage_range* range = add_gpr_usage_range(usage); - range->replacement = -1; - range->start = -1; - range->end = -1; - } - if (usage->ranges[usage->nranges-1].end < id) - usage->ranges[usage->nranges-1].end = id; + range = add_gpr_usage_range(usage); + } else + range = last_gpr_usage_range(usage); + + if (range && range->end < id) + range->end = id; } -static void notice_gpr_rel_read(struct gpr_usage usage[128], int32_t id, unsigned chan) +static void notice_gpr_rel_read(struct r600_bc *bc, struct gpr_usage usage[128], + int id, unsigned gpr, unsigned chan) { unsigned i; - for (i = 0; i < 128; ++i) + for (i = gpr; i < bc->ngpr; ++i) notice_gpr_read(&usage[i], id, chan); + + last_gpr_usage_range(&usage[gpr])->rel_block = bc->ngpr - gpr; } -static void notice_gpr_last_write(struct gpr_usage *usage, int32_t id, unsigned chan) +static void notice_gpr_last_write(struct gpr_usage *usage, int id, unsigned chan) { usage->last_write[chan] = id; } -static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan, +static void notice_gpr_write(struct gpr_usage *usage, int id, unsigned chan, int predicate, int prefered_replacement) { - int32_t start = usage->first_write != -1 ? usage->first_write : id; + struct gpr_usage_range* last_range = last_gpr_usage_range(usage); + int start = usage->first_write != -1 ? usage->first_write : id; usage->channels &= ~(1 << chan); if (usage->channels) { if (usage->first_write == -1) usage->first_write = id; - } else if (!usage->nranges || (usage->ranges[usage->nranges-1].start != start && !predicate)) { + } else if (!last_range || (last_range->start != start && !predicate)) { usage->first_write = start; struct gpr_usage_range* range = add_gpr_usage_range(usage); range->replacement = prefered_replacement; range->start = start; - range->end = -1; - } else if (usage->ranges[usage->nranges-1].start == start && prefered_replacement != -1) { - usage->ranges[usage->nranges-1].replacement = prefered_replacement; + } else if (last_range->start == start && prefered_replacement != -1) { + last_range->replacement = prefered_replacement; } notice_gpr_last_write(usage, id, chan); } -static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int32_t id, unsigned chan) +static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int id, unsigned chan) { unsigned i; for (i = 0; i < 128; ++i) notice_gpr_last_write(&usage[i], id, chan); } -static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsigned chan) +static void notice_gpr_rel_write(struct gpr_usage usage[128], int id, unsigned chan) { unsigned i; for (i = 0; i < 128; ++i) @@ -1656,7 +1678,7 @@ static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsign } static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, - struct gpr_usage usage[128], int32_t id) + struct gpr_usage usage[128], int id) { unsigned src, num_src; @@ -1667,14 +1689,14 @@ static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, continue; if (alu->src[src].rel) - notice_gpr_rel_read(usage, id, alu->src[src].chan); + notice_gpr_rel_read(bc, usage, id, alu->src[src].sel, alu->src[src].chan); else notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan); } } static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128], - int32_t id, int predicate) + int id, int predicate) { struct r600_bc_alu *alu; for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { @@ -1693,18 +1715,19 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage } } -static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], - int32_t id, int predicate) +static void notice_tex_gprs(struct r600_bc *bc, struct r600_bc_tex *tex, + struct gpr_usage usage[128], + int id, int predicate) { if (tex->src_rel) { if (tex->src_sel_x < 4) - notice_gpr_rel_read(usage, id, tex->src_sel_x); + notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_x); if (tex->src_sel_y < 4) - notice_gpr_rel_read(usage, id, tex->src_sel_y); + notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_y); if (tex->src_sel_z < 4) - notice_gpr_rel_read(usage, id, tex->src_sel_z); + notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_z); if (tex->src_sel_w < 4) - notice_gpr_rel_read(usage, id, tex->src_sel_w); + notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_w); } else { if (tex->src_sel_x < 4) notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x); @@ -1737,7 +1760,7 @@ static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128] } static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], - int32_t id, int predicate) + int id, int predicate) { notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x); @@ -1752,11 +1775,14 @@ static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128] } static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], - struct r600_bc_cf *export_cf[128], int32_t export_remap[128]) + struct r600_bc_cf *export_cf[128], int export_remap[128]) { //TODO handle other memory operations struct gpr_usage *output = &usage[cf->output.gpr]; - int32_t id = (output->last_write[0] + 0x100) & ~0xFF; + int id = MAX4(output->last_write[0], output->last_write[1], + output->last_write[2], output->last_write[3]); + id += 0x100; + id &= ~0xFF; export_cf[cf->output.gpr] = cf; export_remap[cf->output.gpr] = id; @@ -1770,7 +1796,7 @@ static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128 notice_gpr_read(output, id, cf->output.swizzle_w); } -static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id) +static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int id) { unsigned i; for (i = 0; i < usage->nranges; ++i) { @@ -1782,21 +1808,20 @@ static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t i return NULL; } -static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id) +static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int id) { unsigned i; for (i = 0; i < usage->nranges; ++i) { struct gpr_usage_range* range = &usage->ranges[i]; - int32_t end = range->end; + int end = range->end; if (range->start <= id && (id < end || end == -1)) return range; } - assert(0); /* should not happen */ return NULL; } -static int is_barrier_needed(struct gpr_usage *usage, int32_t id, unsigned chan, int32_t last_barrier) +static int is_barrier_needed(struct gpr_usage *usage, int id, unsigned chan, int last_barrier) { if (usage->last_write[chan] != (id & ~0xFF)) return usage->last_write[chan] >= last_barrier; @@ -1809,33 +1834,47 @@ static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b) return a->start <= b->end && b->start < a->end; } -static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range) +static int rate_replacement(struct gpr_usage usage[128], unsigned current, unsigned gpr, + struct gpr_usage_range* range) { + int max_gpr = gpr + MAX2(range->rel_block, 1); + int best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF; unsigned i; - int32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF; - for (i = 0; i < usage->nranges; ++i) { - if (usage->ranges[i].replacement != -1) - continue; /* ignore already remapped ranges */ + for (; gpr < max_gpr; ++gpr) { + + if (gpr >= 128) /* relative gpr block won't fit into clause temporaries */ + return -1; /* forget it */ + + if (gpr == current) /* ignore ranges of to be replaced register */ + continue; + + for (i = 0; i < usage[gpr].nranges; ++i) { + if (usage[gpr].ranges[i].replacement < gpr) + continue; /* ignore already remapped ranges */ - if (is_intersection(&usage->ranges[i], range)) - return -1; /* forget it if usages overlap */ + if (is_intersection(&usage[gpr].ranges[i], range)) + return -1; /* forget it if usages overlap */ - if (range->start >= usage->ranges[i].end) - best_start = MIN2(best_start, range->start - usage->ranges[i].end); + if (range->start >= usage[gpr].ranges[i].end) + best_start = MIN2(best_start, range->start - usage[gpr].ranges[i].end); - if (range->end != -1 && range->end <= usage->ranges[i].start) - best_end = MIN2(best_end, usage->ranges[i].start - range->end); + if (range->end != -1 && range->end <= usage[gpr].ranges[i].start) + best_end = MIN2(best_end, usage[gpr].ranges[i].start - range->end); + } } return best_start + best_end; } static void find_replacement(struct gpr_usage usage[128], unsigned current, - struct gpr_usage_range *range, int is_export) + struct gpr_usage_range *range) { - unsigned i; + unsigned i, j; int best_gpr = -1, best_rate = 0x7FFFFFFF; + if (range->replacement == current) + return; /* register prefers to be not remapped */ + if (range->replacement != -1 && range->replacement <= current) { struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start); if (other && other->replacement != -1) @@ -1843,7 +1882,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, } if (range->replacement != -1 && range->replacement < current) { - int rate = rate_replacement(&usage[range->replacement], range); + int rate = rate_replacement(usage, current, range->replacement, range); /* check if prefered replacement can be used */ if (rate != -1) { @@ -1854,9 +1893,9 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) { /* register is just used inside one ALU clause */ - /* try to use clause temporaryis for it */ + /* try to use clause temporaries for it */ for (i = 127; i > 123; --i) { - int rate = rate_replacement(&usage[i], range); + int rate = rate_replacement(usage, current, i, range); if (rate == -1) /* can't be used because ranges overlap */ continue; @@ -1866,7 +1905,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, best_gpr = i; /* can't get better than this */ - if (rate == 0 || is_export) + if (rate == 0) break; } } @@ -1874,7 +1913,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, if (best_gpr == -1) { for (i = 0; i < current; ++i) { - int rate = rate_replacement(&usage[i], range); + int rate = rate_replacement(usage, current, i, range); if (rate == -1) /* can't be used because ranges overlap */ continue; @@ -1890,33 +1929,30 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, } } - range->replacement = best_gpr; if (best_gpr != -1) { struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]); - reservation->replacement = -1; + reservation->replacement = best_gpr; + reservation->rel_block = -1; reservation->start = range->start; reservation->end = range->end; - } -} - -static void find_export_replacement(struct gpr_usage usage[128], - struct gpr_usage_range *range, struct r600_bc_cf *current, - struct r600_bc_cf *next, int32_t next_id) -{ - if (!next || next_id <= range->start || next_id > range->end) - return; - - if (current->output.type != next->output.type) - return; + } else + best_gpr = current; - if ((current->output.array_base + 1) != next->output.array_base) - return; + range->replacement = best_gpr; + if (range->rel_block == -1) + return; /* no relative block to handle we are done here */ - find_src_range(&usage[next->output.gpr], next_id)->replacement = range->replacement + 1; + /* set prefered register for the whole relative register block */ + for (i = current + 1, ++best_gpr; i < current + range->rel_block; ++i, ++best_gpr) { + for (j = 0; j < usage[i].nranges; ++j) { + if (is_intersection(&usage[i].ranges[j], range)) + usage[i].ranges[j].replacement = best_gpr; + } + } } static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct gpr_usage usage[128], - int32_t id, int32_t last_barrier, unsigned *barrier) + int id, int last_barrier, unsigned *barrier) { struct gpr_usage *cur_usage; struct gpr_usage_range *range; @@ -1930,8 +1966,7 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct cur_usage = &usage[alu->src[src].sel]; range = find_src_range(cur_usage, id); - if (range->replacement != -1) - alu->src[src].sel = range->replacement; + alu->src[src].sel = range->replacement; *barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier); } @@ -1939,15 +1974,18 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct if (alu->dst.write) { cur_usage = &usage[alu->dst.sel]; range = find_dst_range(cur_usage, id); - if (range->replacement == alu->dst.sel) { + if (!range || range->replacement == -1) { if (!alu->is_op3) alu->dst.write = 0; else /*TODO: really check that register 123 is useable */ alu->dst.sel = 123; - } else if (range->replacement != -1) { + } else { alu->dst.sel = range->replacement; + *barrier |= is_barrier_needed(cur_usage, id, alu->dst.chan, last_barrier); } + } + if (alu->dst.write) { if (alu->dst.rel) notice_gpr_rel_last_write(usage, id, alu->dst.chan); else @@ -1956,7 +1994,7 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct } static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], - int32_t id, int32_t last_barrier, unsigned *barrier) + int id, int last_barrier, unsigned *barrier) { struct gpr_usage *cur_usage = &usage[tex->src_gpr]; struct gpr_usage_range *range = find_src_range(cur_usage, id); @@ -1973,64 +2011,68 @@ static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128 if (tex->src_sel_w < 4) *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier); } - - if (range->replacement != -1) - tex->src_gpr = range->replacement; + tex->src_gpr = range->replacement; cur_usage = &usage[tex->dst_gpr]; + range = find_dst_range(cur_usage, id); - if (range->replacement != -1) + if (range) { tex->dst_gpr = range->replacement; - if (tex->dst_rel) { - if (tex->dst_sel_x != 7) - notice_gpr_rel_last_write(usage, id, tex->dst_sel_x); - if (tex->dst_sel_y != 7) - notice_gpr_rel_last_write(usage, id, tex->dst_sel_y); - if (tex->dst_sel_z != 7) - notice_gpr_rel_last_write(usage, id, tex->dst_sel_z); - if (tex->dst_sel_w != 7) - notice_gpr_rel_last_write(usage, id, tex->dst_sel_w); + if (tex->dst_rel) { + if (tex->dst_sel_x != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_x); + if (tex->dst_sel_y != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_y); + if (tex->dst_sel_z != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_z); + if (tex->dst_sel_w != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_w); + } else { + if (tex->dst_sel_x != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_x); + if (tex->dst_sel_y != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_y); + if (tex->dst_sel_z != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_z); + if (tex->dst_sel_w != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_w); + } } else { - if (tex->dst_sel_x != 7) - notice_gpr_last_write(cur_usage, id, tex->dst_sel_x); - if (tex->dst_sel_y != 7) - notice_gpr_last_write(cur_usage, id, tex->dst_sel_y); - if (tex->dst_sel_z != 7) - notice_gpr_last_write(cur_usage, id, tex->dst_sel_z); - if (tex->dst_sel_w != 7) - notice_gpr_last_write(cur_usage, id, tex->dst_sel_w); + tex->dst_gpr = 123; } } static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], - int32_t id, int32_t last_barrier, unsigned *barrier) + int id, int last_barrier, unsigned *barrier) { struct gpr_usage *cur_usage = &usage[vtx->src_gpr]; struct gpr_usage_range *range = find_src_range(cur_usage, id); *barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier); - if (range->replacement != -1) - vtx->src_gpr = range->replacement; + vtx->src_gpr = range->replacement; cur_usage = &usage[vtx->dst_gpr]; range = find_dst_range(cur_usage, id); - if (range->replacement != -1) + if (range) { vtx->dst_gpr = range->replacement; - if (vtx->dst_sel_x != 7) - notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x); - if (vtx->dst_sel_y != 7) - notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y); - if (vtx->dst_sel_z != 7) - notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z); - if (vtx->dst_sel_w != 7) - notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w); + if (vtx->dst_sel_x != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x); + if (vtx->dst_sel_y != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y); + if (vtx->dst_sel_z != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z); + if (vtx->dst_sel_w != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w); + } else { + vtx->dst_gpr = 123; + } } static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], - int32_t id, int32_t last_barrier) + int id, int last_barrier) { //TODO handle other memory operations struct gpr_usage *cur_usage = &usage[cf->output.gpr]; @@ -2046,8 +2088,7 @@ static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[12 if (cf->output.swizzle_w < 4) cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier); - if (range->replacement != -1) - cf->output.gpr = range->replacement; + cf->output.gpr = range->replacement; } static void optimize_alu_inst(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_alu *alu) @@ -2138,9 +2179,9 @@ static void r600_bc_optimize(struct r600_bc *bc) /* assume that each gpr is exported only once */ struct r600_bc_cf *export_cf[128] = { NULL }; - int32_t export_remap[128]; + int export_remap[128]; - int32_t id, barrier[bc->nstack]; + int id, cond_start, barrier[bc->nstack]; unsigned i, j, stack, predicate, old_stack; memset(&usage, 0, sizeof(usage)); @@ -2155,7 +2196,11 @@ static void r600_bc_optimize(struct r600_bc *bc) /* first gather some informations about the gpr usage */ id = 0; stack = 0; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (get_cf_class(cf)) { + old_stack = stack; + if (stack == 0) + cond_start = stack; + + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: predicate = 0; first = NULL; @@ -2180,7 +2225,7 @@ static void r600_bc_optimize(struct r600_bc *bc) break; case CF_CLASS_TEXTURE: LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - notice_tex_gprs(tex, usage, id++, stack > 0); + notice_tex_gprs(bc, tex, usage, id++, stack > 0); } break; case CF_CLASS_VERTEX: @@ -2207,6 +2252,14 @@ static void r600_bc_optimize(struct r600_bc *bc) goto out; } } + + /* extend last_write after conditional block */ + if (stack == 0 && old_stack != 0) + for (i = 0; i < 128; ++i) + for (j = 0; j < 4; ++j) + if (usage[i].last_write[j] >= cond_start) + usage[i].last_write[j] = id; + id += 0x100; id &= ~0xFF; } @@ -2216,26 +2269,19 @@ static void r600_bc_optimize(struct r600_bc *bc) for (i = 0; i < 124; ++i) { for (j = 0; j < usage[i].nranges; ++j) { struct gpr_usage_range *range = &usage[i].ranges[j]; - int is_export = export_cf[i] && export_cf[i + 1] && - range->start < export_remap[i] && - export_remap[i] <= range->end; - if (range->start == -1) - range->replacement = -1; - else if (range->end == -1) + /* can't rearange shader inputs */ range->replacement = i; + else if (range->end == -1) + /* gpr isn't used any more after this instruction */ + range->replacement = -1; else - find_replacement(usage, i, range, is_export); + find_replacement(usage, i, range); - if (range->replacement == -1) + if (range->replacement == i) bc->ngpr = i; else if (range->replacement < i && range->replacement > bc->ngpr) bc->ngpr = range->replacement; - - if (is_export && range->replacement != -1) { - find_export_replacement(usage, range, export_cf[i], - export_cf[i + 1], export_remap[i + 1]); - } } } bc->ngpr++; @@ -2251,7 +2297,7 @@ static void r600_bc_optimize(struct r600_bc *bc) id = 0; stack = 0; LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { old_stack = stack; - switch (get_cf_class(cf)) { + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: predicate = 0; first = NULL; @@ -2308,7 +2354,7 @@ static void r600_bc_optimize(struct r600_bc *bc) barrier[i] = barrier[old_stack]; id += 0x100; - if (stack != 0) /* ensue exports are placed outside of conditional blocks */ + if (stack != 0) /* ensure exports are placed outside of conditional blocks */ continue; for (i = 0; i < 128; ++i) { @@ -2350,13 +2396,13 @@ int r600_bc_build(struct r600_bc *bc) bc->nstack = 1; } - r600_bc_optimize(bc); + //r600_bc_optimize(bc); /* first path compute addr of each CF block */ /* addr start after all the CF instructions */ addr = LIST_ENTRY(struct r600_bc_cf, bc->cf.prev, list)->id + 2; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (get_cf_class(cf)) { + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: break; case CF_CLASS_TEXTURE: @@ -2365,7 +2411,6 @@ int r600_bc_build(struct r600_bc *bc) addr += 3; addr &= 0xFFFFFFFCUL; break; - break; case CF_CLASS_EXPORT: if (cf->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT)) exports[cf->output.type] = cf; @@ -2400,7 +2445,7 @@ int r600_bc_build(struct r600_bc *bc) r = r600_bc_cf_build(bc, cf); if (r) return r; - switch (get_cf_class(cf)) { + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: nliteral = 0; memset(literal, 0, sizeof(literal)); @@ -2526,7 +2571,7 @@ void r600_bc_dump(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { id = cf->id; - switch (get_cf_class(cf)) { + switch (r600_bc_cf_class(cf)) { case CF_CLASS_ALU: fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); fprintf(stderr, "ADDR:%04d ", cf->addr); @@ -2564,7 +2609,6 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); - fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); fprintf(stderr, "BARRIER:%d ", cf->barrier); fprintf(stderr, "INST:%d ", cf->inst); fprintf(stderr, "BURST_COUNT:%d\n", cf->output.burst_count); @@ -2649,21 +2693,21 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) if (count > 8) { bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(8 - 1); bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count - 8 - 1); } else { bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count - 1); } bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(0); + S_SQ_CF_WORD1_BARRIER(1); rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; @@ -2685,21 +2729,21 @@ void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned coun if (count > 8) { bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(8 - 1); bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT((count - 8) - 1); } else { bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(0) | + S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count - 1); } bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(0); + S_SQ_CF_WORD1_BARRIER(1); rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; -- 2.30.2