shader->chunk = chunk->next;
free(chunk);
}
- free(shader->instrs);
+ free(shader->indirects);
free(shader->baryfs);
free(shader);
}
void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
uint32_t gpu_id)
{
+ struct ir3_block *block = shader->block;
uint32_t *ptr, *dwords;
- uint32_t i;
info->max_reg = -1;
info->max_half_reg = -1;
info->max_const = -1;
info->instrs_count = 0;
+ info->sizedwords = 0;
+
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ info->sizedwords += 2;
+ }
/* need a integer number of instruction "groups" (sets of 16
* instructions on a4xx or sets of 4 instructions on a3xx),
* so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
*/
if (gpu_id >= 400) {
- info->sizedwords = 2 * align(shader->instrs_count, 16);
+ info->sizedwords = align(info->sizedwords, 16 * 2);
} else {
- info->sizedwords = 2 * align(shader->instrs_count, 4);
+ info->sizedwords = align(info->sizedwords, 4 * 2);
}
ptr = dwords = calloc(4, info->sizedwords);
- for (i = 0; i < shader->instrs_count; i++) {
- struct ir3_instruction *instr = shader->instrs[i];
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
int ret = emit[instr->category](instr, dwords, info);
if (ret)
goto fail;
return reg;
}
-static void insert_instr(struct ir3 *shader,
+static void insert_instr(struct ir3_block *block,
struct ir3_instruction *instr)
{
+ struct ir3 *shader = block->shader;
#ifdef DEBUG
static uint32_t serialno = 0;
instr->serialno = ++serialno;
#endif
- array_insert(shader->instrs, instr);
+ list_addtail(&instr->node, &block->instr_list);
if (is_input(instr))
array_insert(shader->baryfs, instr);
block->shader = shader;
+ list_inithead(&block->instr_list);
+
return block;
}
instr->block = block;
instr->category = category;
instr->opc = opc;
- insert_instr(block->shader, instr);
+ insert_instr(block, instr);
return instr;
}
*new_instr = *instr;
new_instr->regs = regs;
- insert_instr(instr->block->shader, new_instr);
+ insert_instr(instr->block, new_instr);
/* clone registers: */
new_instr->regs_count = 0;
#include <stdbool.h>
#include "util/u_debug.h"
+#include "util/list.h"
#include "instr-a3xx.h"
#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
*/
struct ir3_instruction *fanin;
- struct ir3_instruction *next;
+ /* Entry in ir3_block's instruction list: */
+ struct list_head node;
+
#ifdef DEBUG
uint32_t serialno;
#endif
struct ir3_heap_chunk;
struct ir3 {
- unsigned instrs_count, instrs_sz;
- struct ir3_instruction **instrs;
/* Track bary.f (and ldlv) instructions.. this is needed in
* scheduling to ensure that all varying fetches happen before
/* only a single address register: */
struct ir3_instruction *address;
struct ir3_block *parent;
- struct ir3_instruction *head;
+ struct list_head instr_list;
};
struct ir3 * ir3_create(void);
* a block, so tracking the list of instrs globally is
* unlikely to be what we want.
*/
- unsigned i;
- for (i = 0; i < shader->instrs_count; i++) {
- struct ir3_instruction *instr = shader->instrs[i];
+ list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node)
instr->flags &= ~IR3_INSTR_MARK;
- }
}
static inline int ir3_instr_regno(struct ir3_instruction *instr,
/* depth calculation: */
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
void ir3_block_depth(struct ir3_block *block);
/* copy-propagate: */
{
struct ir3_register *reg;
- /* stay within the block.. don't try to operate across
- * basic block boundaries or we'll have problems when
- * dealing with multiple basic blocks:
- */
- if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
- return instr;
-
if (is_eligible_mov(instr, !!flags)) {
struct ir3_register *reg = instr->regs[1];
struct ir3_instruction *src_instr = ssa(reg);
return instr;
}
-static void block_cp(struct ir3_block *block)
+void ir3_block_cp(struct ir3_block *block)
{
- unsigned i;
+ ir3_clear_mark(block->shader);
- for (i = 0; i < block->noutputs; i++) {
+ for (unsigned i = 0; i < block->noutputs; i++) {
if (block->outputs[i]) {
struct ir3_instruction *out =
instr_cp(block->outputs[i], NULL);
}
}
}
-
-void ir3_block_cp(struct ir3_block *block)
-{
- ir3_clear_mark(block->shader);
- block_cp(block);
-}
}
}
-static void insert_by_depth(struct ir3_instruction *instr)
+void
+ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
{
- struct ir3_block *block = instr->block;
- struct ir3_instruction *n = block->head;
- struct ir3_instruction *p = NULL;
-
- while (n && (n != instr) && (n->depth > instr->depth)) {
- p = n;
- n = n->next;
+ /* remove from existing spot in list: */
+ list_delinit(&instr->node);
+
+ /* find where to re-insert instruction: */
+ list_for_each_entry (struct ir3_instruction, pos, list, node) {
+ if (pos->depth > instr->depth) {
+ list_add(&instr->node, &pos->node);
+ return;
+ }
}
-
- instr->next = n;
- if (p)
- p->next = instr;
- else
- block->head = instr;
+ /* if we get here, we didn't find an insertion spot: */
+ list_addtail(&instr->node, list);
}
-static void ir3_instr_depth(struct ir3_instruction *instr)
+static void
+ir3_instr_depth(struct ir3_instruction *instr)
{
struct ir3_instruction *src;
instr->depth = MAX2(instr->depth, sd);
}
- /* meta-instructions don't add cycles, other than PHI.. which
- * might translate to a real instruction..
- *
- * well, not entirely true, fan-in/out, etc might need to need
- * to generate some extra mov's in edge cases, etc.. probably
- * we might want to do depth calculation considering the worst
- * case for these??
- */
if (!is_meta(instr))
instr->depth++;
- insert_by_depth(instr);
+ ir3_insert_by_depth(instr, &instr->block->instr_list);
+}
+
+static void
+remove_unused_by_block(struct ir3_block *block)
+{
+ list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (!ir3_instr_check_mark(instr)) {
+ /* mark it, in case it is input, so we can
+ * remove unused inputs:
+ */
+ instr->depth = DEPTH_UNUSED;
+ /* and remove from instruction list: */
+ list_delinit(&instr->node);
+ }
+ }
}
void ir3_block_depth(struct ir3_block *block)
{
unsigned i;
- block->head = NULL;
-
ir3_clear_mark(block->shader);
for (i = 0; i < block->noutputs; i++)
if (block->outputs[i])
ir3_instr_depth(block->outputs[i]);
/* mark un-used instructions: */
- for (i = 0; i < block->shader->instrs_count; i++) {
- struct ir3_instruction *instr = block->shader->instrs[i];
-
- /* just consider instructions within this block: */
- if (instr->block != block)
- continue;
-
- if (!ir3_instr_check_mark(instr))
- instr->depth = DEPTH_UNUSED;
- }
+ remove_unused_by_block(block);
/* cleanup unused inputs: */
for (i = 0; i < block->ninputs; i++) {
static void legalize(struct ir3_legalize_ctx *ctx)
{
struct ir3_block *block = ctx->block;
- struct ir3_instruction *n;
- struct ir3 *shader = block->shader;
- struct ir3_instruction *end =
- ir3_instr_create(block, 0, OPC_END);
struct ir3_instruction *last_input = NULL;
struct ir3_instruction *last_rel = NULL;
+ struct list_head instr_list;
regmask_t needs_ss_war; /* write after read */
regmask_t needs_ss;
regmask_t needs_sy;
regmask_init(&needs_ss);
regmask_init(&needs_sy);
- shader->instrs_count = 0;
+ /* remove all the instructions from the list, we'll be adding
+ * them back in as we go
+ */
+ list_replace(&block->instr_list, &instr_list);
+ list_inithead(&block->instr_list);
- for (n = block->head; n; n = n->next) {
+ list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
struct ir3_register *reg;
unsigned i;
}
/* need to be able to set (ss) on first instruction: */
- if ((shader->instrs_count == 0) && (n->category >= 5))
+ if (list_empty(&block->instr_list) && (n->category >= 5))
ir3_NOP(block);
- if (is_nop(n) && shader->instrs_count) {
- struct ir3_instruction *last =
- shader->instrs[shader->instrs_count-1];
+ if (is_nop(n) && !list_empty(&block->instr_list)) {
+ struct ir3_instruction *last = list_last_entry(&block->instr_list,
+ struct ir3_instruction, node);
if (is_nop(last) && (last->repeat < 5)) {
last->repeat++;
last->flags |= n->flags;
}
}
- shader->instrs[shader->instrs_count++] = n;
+ list_addtail(&n->node, &block->instr_list);
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
* the (ei) flag:
*/
if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
- int i, cnt;
+ struct ir3_instruction *baryf;
- /* note that ir3_instr_create() inserts into
- * shader->instrs[] and increments the count..
- * so we need to bump up the cnt initially (to
- * avoid it clobbering the last real instr) and
- * restore it after.
- */
- cnt = ++shader->instrs_count;
-
- /* inserting instructions would be a bit nicer if list.. */
- for (i = cnt - 2; i >= 0; i--) {
- if (shader->instrs[i] == last_input) {
-
- /* (ss)bary.f (ei)r63.x, 0, r0.x */
- last_input = ir3_instr_create(block, 2, OPC_BARY_F);
- last_input->flags |= IR3_INSTR_SS;
- ir3_reg_create(last_input, regid(63, 0), 0);
- ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
- ir3_reg_create(last_input, regid(0, 0), 0);
+ /* (ss)bary.f (ei)r63.x, 0, r0.x */
+ baryf = ir3_instr_create(block, 2, OPC_BARY_F);
+ baryf->flags |= IR3_INSTR_SS;
+ ir3_reg_create(baryf, regid(63, 0), 0);
+ ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+ ir3_reg_create(baryf, regid(0, 0), 0);
- shader->instrs[i + 1] = last_input;
-
- break;
- }
- shader->instrs[i + 1] = shader->instrs[i];
- }
+ /* insert the dummy bary.f after last_input: */
+ list_add(&baryf->node, &last_input->node);
- shader->instrs_count = cnt;
+ last_input = baryf;
}
last_input->regs[0]->flags |= IR3_REG_EI;
}
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;
- shader->instrs[shader->instrs_count++] = end;
+ /* create/add 'end' instruction: */
+ ir3_instr_create(block, 0, OPC_END);
- shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+ list_first_entry(&block->instr_list, struct ir3_instruction, node)
+ ->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
}
void ir3_block_legalize(struct ir3_block *block,
static void
print_block(struct ir3_block *block, int lvl)
{
- struct ir3_instruction *instr;
tab(lvl); printf("block {\n");
- for (instr = block->head; instr; instr = instr->next) {
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
print_instr(instr, lvl+1);
}
tab(lvl); printf("}\n");
# define ra_debug 0
#endif
-#define ra_dump_list(msg, n) do { \
+#define ra_dump_list(msg, ir) do { \
if (ra_debug) { \
debug_printf("-- " msg); \
- ir3_print(n->block->shader); \
+ ir3_print(ir); \
} \
} while (0)
static void compute_liveregs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, regmask_t *liveregs)
{
- struct ir3_block *block = instr->block;
- struct ir3_instruction *n;
+ struct ir3_block *block = ctx->block;
regmask_t written;
unsigned i;
regmask_init(&written);
- for (n = instr->next; n; n = n->next) {
+ list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
struct ir3_register *r;
if (is_meta(n))
static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, unsigned name)
{
- struct ir3_instruction *n, *src;
-
- for (n = instr->next; n && !ctx->error; n = n->next) {
+ list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
+ struct ir3_instruction *src;
foreach_ssa_src_n(src, i, n) {
unsigned r = i + 1;
if (src == instr)
instr_assign_src(ctx, n, r, name);
}
+ if (ctx->error)
+ break;
}
}
}
-static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+static bool
+block_ra(struct ir3_block *block, void *state)
{
- struct ir3_instruction *n;
+ struct ir3_ra_ctx *ctx = state;
+
+ ra_dump_list("-------\n", block->shader);
+
+ /* first pass, assign arrays: */
+ list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
+ if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
+ debug_assert(!n->cp.left); /* don't think this should happen */
+ ra_dump_instr("ASSIGN ARRAY: ", n);
+ instr_assign_array(ctx, n);
+ ra_dump_list("-------\n", block->shader);
+ }
+
+ if (ctx->error)
+ return false;
+ }
+
+ list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
+ ra_dump_instr("ASSIGN: ", n);
+ instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
+ ra_dump_list("-------\n", block->shader);
+
+ if (ctx->error)
+ return false;
+ }
+
+ return true;
+}
+static int
+shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
/* frag shader inputs get pre-assigned, since we have some
* constraints/unknowns about setup for some of these regs:
*/
- if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+ if (ctx->type == SHADER_FRAGMENT) {
unsigned i = 0, j;
if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
/* if we have frag_face, it gets hr0.x */
instr_assign(ctx, block->inputs[i], j);
}
- ra_dump_list("-------\n", block->head);
-
- /* first pass, assign arrays: */
- for (n = block->head; n && !ctx->error; n = n->next) {
- if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
- debug_assert(!n->cp.left); /* don't think this should happen */
- ra_dump_instr("ASSIGN ARRAY: ", n);
- instr_assign_array(ctx, n);
- ra_dump_list("-------\n", block->head);
- }
- }
-
- for (n = block->head; n && !ctx->error; n = n->next) {
- ra_dump_instr("ASSIGN: ", n);
- instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
- ra_dump_list("-------\n", block->head);
- }
+ block_ra(block, ctx);
return ctx->error ? -1 : 0;
}
+static bool
+block_mark_dst(struct ir3_block *block, void *state)
+{
+ list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node)
+ if (n->regs_count > 0)
+ n->regs[0]->flags |= IR3_REG_SSA;
+ return true;
+}
+
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
bool frag_coord, bool frag_face)
{
- struct ir3_instruction *n;
struct ir3_ra_ctx ctx = {
.block = block,
.type = type,
* NOTE: we really should set SSA flag consistently on
* every dst register in the frontend.
*/
- for (n = block->head; n; n = n->next)
- if (n->regs_count > 0)
- n->regs[0]->flags |= IR3_REG_SSA;
+ block_mark_dst(block, &ctx);
ir3_clear_mark(block->shader);
- ret = block_ra(&ctx, block);
+ ret = shader_ra(&ctx, block);
return ret;
}
return d;
}
-static unsigned distance(struct ir3_sched_ctx *ctx,
- struct ir3_instruction *instr, unsigned maxd)
+static unsigned
+distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
+ unsigned maxd)
{
- struct ir3_instruction *n = ctx->scheduled;
+ struct list_head *instr_list = &instr->block->instr_list;
unsigned d = 0;
- while (n && (n != instr) && (d < maxd)) {
+
+ list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
+ if ((n == instr) || (d >= maxd))
+ break;
if (is_alu(n) || is_flow(n))
d++;
- n = n->next;
}
- return d;
-}
-/* TODO maybe we want double linked list? */
-static struct ir3_instruction * prev(struct ir3_instruction *instr)
-{
- struct ir3_instruction *p = instr->block->head;
- while (p && (p->next != instr))
- p = p->next;
- return p;
+ return d;
}
static bool is_sfu_or_mem(struct ir3_instruction *instr)
* scheduling and depth calculation..
*/
if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
- schedule(ctx, ir3_NOP(block), false);
+ ir3_NOP(block);
/* remove from depth list:
*/
- if (remove) {
- struct ir3_instruction *p = prev(instr);
-
- /* NOTE: this can happen for inputs which are not
- * read.. in that case there is no need to schedule
- * the input, so just bail:
- */
- if (instr != (p ? p->next : block->head))
- return;
-
- if (p)
- p->next = instr->next;
- else
- block->head = instr->next;
- }
+ list_delinit(&instr->node);
if (writes_addr(instr)) {
assert(ctx->addr == NULL);
instr->flags |= IR3_INSTR_MARK;
- instr->next = ctx->scheduled;
+ list_addtail(&instr->node, &instr->block->instr_list);
ctx->scheduled = instr;
ctx->cnt++;
return SCHEDULED;
}
-static struct ir3_instruction * reverse(struct ir3_instruction *instr)
-{
- struct ir3_instruction *reversed = NULL;
- while (instr) {
- struct ir3_instruction *next = instr->next;
- instr->next = reversed;
- reversed = instr;
- instr = next;
- }
- return reversed;
-}
-
static bool uses_current_addr(struct ir3_sched_ctx *ctx,
struct ir3_instruction *instr)
{
* other instructions using the current address register:
*/
static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
- struct ir3_block *block)
+ struct list_head *unscheduled_list)
{
- struct ir3_instruction *instr = block->head;
bool addr_in_use = false;
bool pred_in_use = false;
bool all_delayed = true;
unsigned cnt = ~0, attempted = 0;
- while (instr) {
- struct ir3_instruction *next = instr->next;
+ list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) {
bool addr = uses_current_addr(ctx, instr);
bool pred = uses_current_pred(ctx, instr);
attempted++;
}
-
- instr = next;
}
if (!addr_in_use)
static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
{
- struct ir3_instruction *instr;
+ struct list_head unscheduled_list;
+
+ list_replace(&block->instr_list, &unscheduled_list);
+ list_inithead(&block->instr_list);
/* schedule all the shader input's (meta-instr) first so that
* the RA step sees that the input registers contain a value
}
}
- while ((instr = block->head) && !ctx->error) {
- /* NOTE: always grab next *before* trysched(), in case the
- * instruction is actually scheduled (and therefore moved
- * from depth list into scheduled list)
- */
- struct ir3_instruction *next = instr->next;
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
int cnt = trysched(ctx, instr);
if (cnt == DELAYED)
- cnt = block_sched_undelayed(ctx, block);
+ cnt = block_sched_undelayed(ctx, &unscheduled_list);
/* -1 is signal to return up stack, but to us means same as 0: */
cnt = MAX2(0, cnt);
cnt += ctx->cnt;
- instr = next;
/* if deepest remaining instruction cannot be scheduled, try
* the increasingly more shallow instructions until needed
* number of delay slots is filled:
*/
- while (instr && (cnt > ctx->cnt)) {
- next = instr->next;
+ list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node)
trysched(ctx, instr);
- instr = next;
- }
/* and if we run out of instructions that can be scheduled,
* then it is time for nop's:
while (cnt > ctx->cnt)
schedule(ctx, ir3_NOP(block), false);
}
-
- /* at this point, scheduled list is in reverse order, so fix that: */
- block->head = reverse(ctx->scheduled);
}
int ir3_block_sched(struct ir3_block *block)