+struct gpr_usage_range {
+ int replacement;
+ int32_t start;
+ int32_t end;
+};
+
+struct gpr_usage {
+ unsigned channels:4;
+ int32_t first_write;
+ unsigned nranges;
+ struct gpr_usage_range *ranges;
+};
+
+static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
+{
+ usage->nranges++;
+ usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range));
+ if (!usage->ranges)
+ return NULL;
+ return &usage->ranges[usage->nranges-1];
+}
+
+static void notice_gpr_read(struct gpr_usage *usage, uint32_t id, unsigned chan)
+{
+ usage->channels |= 1 << chan;
+ usage->first_write = -1;
+ if (!usage->nranges) {
+ add_gpr_usage_range(usage)->start = -1;
+ }
+ usage->ranges[usage->nranges-1].end = id;
+}
+
+static void notice_gpr_rel_read(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+{
+ unsigned i;
+ for (i = 0; i < 128; ++i)
+ notice_gpr_read(&usage[i], id, chan);
+}
+
+static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan)
+{
+ uint32_t start = usage->first_write != -1 ? usage->first_write : id;
+ usage->channels &= ~(1 << chan);
+ if (usage->channels) {
+ if (usage->first_write == -1)
+ usage->first_write = id;
+ } else if (!usage->nranges || usage->ranges[usage->nranges-1].start != start) {
+ usage->first_write = start;
+ struct gpr_usage_range* range = add_gpr_usage_range(usage);
+ range->start = start;
+ range->end = -1;
+ }
+}
+
+static void notice_gpr_rel_write(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+{
+ /* we can't know wich gpr is really used, so ignore it for now*/
+}
+
+static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+{
+ unsigned src, num_src;
+
+ num_src = r600_bc_get_num_operands(alu);
+ for (src = 0; src < num_src; ++src) {
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ if (alu->src[src].rel)
+ notice_gpr_rel_read(usage, id, alu->src[src].chan);
+ else
+ notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan);
+ }
+}
+
+static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128], uint32_t id)
+{
+ struct r600_bc_alu *alu;
+ for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+ if (alu->dst.write) {
+ if (alu->dst.rel)
+ notice_gpr_rel_write(usage, id, alu->dst.chan);
+ else
+ notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan);
+ }
+
+ if (alu->last)
+ break;
+ }
+}
+
+static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+{
+ if (tex->src_rel) {
+ if (tex->src_sel_x < 4)
+ notice_gpr_rel_read(usage, id, tex->src_sel_x);
+ if (tex->src_sel_y < 4)
+ notice_gpr_rel_read(usage, id, tex->src_sel_y);
+ if (tex->src_sel_z < 4)
+ notice_gpr_rel_read(usage, id, tex->src_sel_z);
+ if (tex->src_sel_w < 4)
+ notice_gpr_rel_read(usage, id, tex->src_sel_w);
+ } else {
+ if (tex->src_sel_x < 4)
+ notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x);
+ if (tex->src_sel_y < 4)
+ notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_y);
+ if (tex->src_sel_z < 4)
+ notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_z);
+ if (tex->src_sel_w < 4)
+ notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_w);
+ }
+ if (tex->dst_rel) {
+ if (tex->dst_sel_x != 7)
+ notice_gpr_rel_write(usage, id, 0);
+ if (tex->dst_sel_y != 7)
+ notice_gpr_rel_write(usage, id, 1);
+ if (tex->dst_sel_z != 7)
+ notice_gpr_rel_write(usage, id, 2);
+ if (tex->dst_sel_w != 7)
+ notice_gpr_rel_write(usage, id, 3);
+ } else {
+ if (tex->dst_sel_x != 7)
+ notice_gpr_write(&usage[tex->dst_gpr], id, 0);
+ if (tex->dst_sel_y != 7)
+ notice_gpr_write(&usage[tex->dst_gpr], id, 1);
+ if (tex->dst_sel_z != 7)
+ notice_gpr_write(&usage[tex->dst_gpr], id, 2);
+ if (tex->dst_sel_w != 7)
+ notice_gpr_write(&usage[tex->dst_gpr], id, 3);
+ }
+}
+
+static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+{
+ notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
+
+ if (vtx->dst_sel_x != 7)
+ notice_gpr_write(&usage[vtx->dst_gpr], id, 0);
+ if (vtx->dst_sel_y != 7)
+ notice_gpr_write(&usage[vtx->dst_gpr], id, 1);
+ if (vtx->dst_sel_z != 7)
+ notice_gpr_write(&usage[vtx->dst_gpr], id, 2);
+ if (vtx->dst_sel_w != 7)
+ notice_gpr_write(&usage[vtx->dst_gpr], id, 3);
+}
+
+static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+{
+ //TODO handle other memory operations
+ if (cf->output.swizzle_x < 4)
+ notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_x);
+ if (cf->output.swizzle_y < 4)
+ notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_y);
+ if (cf->output.swizzle_z < 4)
+ notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_z);
+ if (cf->output.swizzle_w < 4)
+ notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_w);
+}
+
+static int is_in_range(struct gpr_usage_range* range, int32_t value)
+{
+ int32_t start = range->start == -1 ? 0 : range->start;
+ int32_t end = range->end;
+
+ return start <= value && value < end;
+}
+
+static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range)
+{
+ unsigned i;
+ uint32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
+
+ for (i = 0; i < usage->nranges; ++i) {
+ if (usage->ranges[i].replacement != -1)
+ continue; /* ignore already remapped ranges */
+
+ if (is_in_range(&usage->ranges[i], range->start) ||
+ is_in_range(&usage->ranges[i], range->end))
+ return -1; /* forget it if usages overlap */
+
+ if (range->start >= usage->ranges[i].end)
+ best_start = MIN2(best_start, range->start - usage->ranges[i].end);
+
+ if (range->end != -1 && range->end <= usage->ranges[i].start)
+ best_end = MIN2(best_end, usage->ranges[i].start - range->end);
+ }
+ return best_start + best_end;
+}
+
+static void find_replacement(struct gpr_usage usage[128], unsigned current, struct gpr_usage_range *range)
+{
+ unsigned i;
+ int best_gpr = -1, best_rate = 0x7FFFFFFF;
+
+ if ((range->start & ~0xFF) == (range->end & ~0xFF)) {
+ /* register is just used inside one ALU clause */
+ /* try to use clause temporaryis for it */
+ for (i = 127; i > 123; --i) {
+ int rate = rate_replacement(&usage[i], range);
+
+ if (rate == -1) /* can't be used because ranges overlap */
+ continue;
+
+ if (rate < best_rate) {
+ best_rate = rate;
+ best_gpr = i;
+
+ /* can't get better than this */
+ if (rate == 0)
+ break;
+ }
+ }
+ }
+
+ if (best_gpr == -1) {
+ for (i = 0; i < current; ++i) {
+ int rate = rate_replacement(&usage[i], range);
+
+ if (rate == -1) /* can't be used because ranges overlap */
+ continue;
+
+ if (rate < best_rate) {
+ best_rate = rate;
+ best_gpr = i;
+
+ /* can't get better than this */
+ if (rate == 0)
+ break;
+ }
+ }
+ }
+
+ range->replacement = best_gpr;
+ if (best_gpr != -1) {
+ struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]);
+ reservation->replacement = -1;
+ reservation->start = range->start;
+ reservation->end = range->end;
+ }
+}
+
+static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
+{
+ unsigned i;
+ for (i = 0; i < usage->nranges; ++i) {
+ struct gpr_usage_range* range = &usage->ranges[i];
+
+ if (range->start < id && id <= range->end)
+ return range;
+ }
+ return NULL;
+}
+
+static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id)
+{
+ unsigned i;
+ for (i = 0; i < usage->nranges; ++i) {
+ struct gpr_usage_range* range = &usage->ranges[i];
+ int32_t end = range->end;
+
+ if (range->start <= id && (id < end || end == -1))
+ return range;
+ }
+ return NULL;
+}
+
+static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+{
+ struct gpr_usage_range *range;
+ unsigned src, num_src;
+
+ num_src = r600_bc_get_num_operands(alu);
+ for (src = 0; src < num_src; ++src) {
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ range = find_src_range(&usage[alu->src[src].sel], id);
+ if (range->replacement != -1)
+ alu->src[src].sel = range->replacement;
+ }
+
+ if (alu->dst.write) {
+ range = find_dst_range(&usage[alu->dst.sel], id);
+ assert(range);
+ if (range->replacement == alu->dst.sel) {
+ if (!alu->is_op3)
+ alu->dst.write = 0;
+ else
+ /*TODO: really check that register 123 is useable */
+ alu->dst.sel = 123;
+ } else if (range->replacement != -1) {
+ alu->dst.sel = range->replacement;
+ }
+ }
+}
+
+static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+{
+ struct gpr_usage_range *range;
+ range = find_src_range(&usage[tex->src_gpr], id);
+ if (range->replacement != -1)
+ tex->src_gpr = range->replacement;
+
+ range = find_dst_range(&usage[tex->dst_gpr], id);
+ if (range->replacement != -1)
+ tex->dst_gpr = range->replacement;
+}
+
+static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+{
+ struct gpr_usage_range *range;
+ range = find_src_range(&usage[vtx->src_gpr], id);
+ if (range->replacement != -1)
+ vtx->src_gpr = range->replacement;
+
+ range = find_dst_range(&usage[vtx->dst_gpr], id);
+ if (range->replacement != -1)
+ vtx->dst_gpr = range->replacement;
+}
+
+static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+{
+ //TODO handle other memory operations
+ struct gpr_usage_range *range;
+ range = find_src_range(&usage[cf->output.gpr], id);
+ if (range->replacement != -1)
+ cf->output.gpr = range->replacement;
+}
+
+static void r600_bc_optimize_gprs(struct r600_bc *bc)
+{
+ struct r600_bc_cf *cf;
+ struct r600_bc_alu *first;
+ struct r600_bc_alu *alu;
+ struct r600_bc_vtx *vtx;
+ struct r600_bc_tex *tex;
+ struct gpr_usage usage[128];
+ uint32_t id;
+ unsigned i, j;
+
+ memset(&usage, 0, sizeof(usage));
+ for (i = 0; i < 128; ++i)
+ usage[i].first_write = -1;
+
+ /* first gather some informations about the gpr usage */
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id << 8;
+ switch (get_cf_class(cf)) {
+ case CF_CLASS_ALU:
+ first = NULL;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ if (!first)
+ first = alu;
+ notice_alu_src_gprs(alu, usage, id);
+ if (alu->last) {
+ notice_alu_dst_gprs(first, usage, id);
+ first = NULL;
+ ++id;
+ }
+ }
+ break;
+ case CF_CLASS_TEXTURE:
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ notice_tex_gprs(tex, usage, id++);
+ }
+ break;
+ case CF_CLASS_VERTEX:
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ notice_vtx_gprs(vtx, usage, id++);
+ }
+ break;
+ case CF_CLASS_EXPORT:
+ notice_export_gprs(cf, usage, id);
+ break;
+ case CF_CLASS_OTHER:
+ // TODO implement conditional and loop handling
+ if (cf->inst != V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS)
+ goto out;
+ break;
+ }
+ }
+
+ /* try to optimize gpr usage */
+ for (i = 0; i < 124; ++i) {
+ for (j = 0; j < usage[i].nranges; ++j) {
+ struct gpr_usage_range *range = &usage[i].ranges[j];
+ if (range->start == -1)
+ range->replacement = -1;
+ else if (range->end == -1)
+ range->replacement = i;
+ else
+ find_replacement(usage, i, range);
+
+ if (range->replacement == -1)
+ bc->ngpr = i;
+ else if (range->replacement < i && range->replacement > bc->ngpr)
+ bc->ngpr = range->replacement;
+ }
+ }
+ bc->ngpr++;
+
+ /* apply the changes */
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id << 8;
+ switch (get_cf_class(cf)) {
+ case CF_CLASS_ALU:
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ replace_alu_gprs(alu, usage, id);
+ if (alu->last)
+ ++id;
+ }
+ break;
+ case CF_CLASS_TEXTURE:
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ replace_tex_gprs(tex, usage, id++);
+ }
+ break;
+ case CF_CLASS_VERTEX:
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ replace_vtx_gprs(vtx, usage, id++);
+ }
+ break;
+ case CF_CLASS_EXPORT:
+ replace_export_gprs(cf, usage, id);
+ break;
+ case CF_CLASS_OTHER:
+ break;
+ }
+ }
+
+out:
+ for (i = 0; i < 128; ++i) {
+ free(usage[i].ranges);
+ }
+}
+