freedreno/ir3: use standard list implementation
authorRob Clark <robclark@freedesktop.org>
Thu, 30 Apr 2015 15:38:43 +0000 (11:38 -0400)
committerRob Clark <robclark@freedesktop.org>
Sun, 21 Jun 2015 11:53:09 +0000 (07:53 -0400)
Use standard list_head double-linked list and related iterators,
helpers, etc, rather than weird combo of instruction array and next
pointers depending on stage.  Now block has an instrs_list.  In
certain stages where we want to remove and re-add to the blocks list
we just use list_replace() to copy the list to a new list_head.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_cp.c
src/gallium/drivers/freedreno/ir3/ir3_depth.c
src/gallium/drivers/freedreno/ir3/ir3_legalize.c
src/gallium/drivers/freedreno/ir3/ir3_print.c
src/gallium/drivers/freedreno/ir3/ir3_ra.c
src/gallium/drivers/freedreno/ir3/ir3_sched.c

index e015de91c33412899965f540748ea14f1bedaf3f..84564a9eef7b9df8894d1ba88489de3379083127 100644 (file)
@@ -81,7 +81,7 @@ void ir3_destroy(struct ir3 *shader)
                shader->chunk = chunk->next;
                free(chunk);
        }
-       free(shader->instrs);
+       free(shader->indirects);
        free(shader->baryfs);
        free(shader);
 }
@@ -534,28 +534,32 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr,
 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
                uint32_t gpu_id)
 {
+       struct ir3_block *block = shader->block;
        uint32_t *ptr, *dwords;
-       uint32_t i;
 
        info->max_reg       = -1;
        info->max_half_reg  = -1;
        info->max_const     = -1;
        info->instrs_count  = 0;
+       info->sizedwords    = 0;
+
+       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+               info->sizedwords += 2;
+       }
 
        /* need a integer number of instruction "groups" (sets of 16
         * instructions on a4xx or sets of 4 instructions on a3xx),
         * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
         */
        if (gpu_id >= 400) {
-               info->sizedwords = 2 * align(shader->instrs_count, 16);
+               info->sizedwords = align(info->sizedwords, 16 * 2);
        } else {
-               info->sizedwords = 2 * align(shader->instrs_count, 4);
+               info->sizedwords = align(info->sizedwords, 4 * 2);
        }
 
        ptr = dwords = calloc(4, info->sizedwords);
 
-       for (i = 0; i < shader->instrs_count; i++) {
-               struct ir3_instruction *instr = shader->instrs[i];
+       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
                int ret = emit[instr->category](instr, dwords, info);
                if (ret)
                        goto fail;
@@ -581,14 +585,15 @@ static struct ir3_register * reg_create(struct ir3 *shader,
        return reg;
 }
 
-static void insert_instr(struct ir3 *shader,
+static void insert_instr(struct ir3_block *block,
                struct ir3_instruction *instr)
 {
+       struct ir3 *shader = block->shader;
 #ifdef DEBUG
        static uint32_t serialno = 0;
        instr->serialno = ++serialno;
 #endif
-       array_insert(shader->instrs, instr);
+       list_addtail(&instr->node, &block->instr_list);
 
        if (is_input(instr))
                array_insert(shader->baryfs, instr);
@@ -625,6 +630,8 @@ struct ir3_block * ir3_block_create(struct ir3 *shader,
 
        block->shader = shader;
 
+       list_inithead(&block->instr_list);
+
        return block;
 }
 
@@ -652,7 +659,7 @@ struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
        instr->block = block;
        instr->category = category;
        instr->opc = opc;
-       insert_instr(block->shader, instr);
+       insert_instr(block, instr);
        return instr;
 }
 
@@ -677,7 +684,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
        *new_instr = *instr;
        new_instr->regs = regs;
 
-       insert_instr(instr->block->shader, new_instr);
+       insert_instr(instr->block, new_instr);
 
        /* clone registers: */
        new_instr->regs_count = 0;
index f37dfab3341c8de8ef683d7cdaa810f52e5024d9..edb5b49e23cad0088d38e71007df89d29d6c3d6c 100644 (file)
@@ -28,6 +28,7 @@
 #include <stdbool.h>
 
 #include "util/u_debug.h"
+#include "util/list.h"
 
 #include "instr-a3xx.h"
 #include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
@@ -290,7 +291,9 @@ struct ir3_instruction {
         */
        struct ir3_instruction *fanin;
 
-       struct ir3_instruction *next;
+       /* Entry in ir3_block's instruction list: */
+       struct list_head node;
+
 #ifdef DEBUG
        uint32_t serialno;
 #endif
@@ -321,8 +324,6 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr)
 struct ir3_heap_chunk;
 
 struct ir3 {
-       unsigned instrs_count, instrs_sz;
-       struct ir3_instruction **instrs;
 
        /* Track bary.f (and ldlv) instructions.. this is needed in
         * scheduling to ensure that all varying fetches happen before
@@ -361,7 +362,7 @@ struct ir3_block {
        /* only a single address register: */
        struct ir3_instruction *address;
        struct ir3_block *parent;
-       struct ir3_instruction *head;
+       struct list_head instr_list;
 };
 
 struct ir3 * ir3_create(void);
@@ -402,11 +403,8 @@ static inline void ir3_clear_mark(struct ir3 *shader)
         * a block, so tracking the list of instrs globally is
         * unlikely to be what we want.
         */
-       unsigned i;
-       for (i = 0; i < shader->instrs_count; i++) {
-               struct ir3_instruction *instr = shader->instrs[i];
+       list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node)
                instr->flags &= ~IR3_INSTR_MARK;
-       }
 }
 
 static inline int ir3_instr_regno(struct ir3_instruction *instr,
@@ -756,6 +754,7 @@ int ir3_block_flatten(struct ir3_block *block);
 /* depth calculation: */
 int ir3_delayslots(struct ir3_instruction *assigner,
                struct ir3_instruction *consumer, unsigned n);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
 void ir3_block_depth(struct ir3_block *block);
 
 /* copy-propagate: */
index fa7d363be7bfc81bd20b55406c36784f99bca04c..350f7dd5e6b6e741415753145ad430bb364ec0eb 100644 (file)
@@ -354,13 +354,6 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
 {
        struct ir3_register *reg;
 
-       /* stay within the block.. don't try to operate across
-        * basic block boundaries or we'll have problems when
-        * dealing with multiple basic blocks:
-        */
-       if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
-               return instr;
-
        if (is_eligible_mov(instr, !!flags)) {
                struct ir3_register *reg = instr->regs[1];
                struct ir3_instruction *src_instr = ssa(reg);
@@ -394,11 +387,11 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
        return instr;
 }
 
-static void block_cp(struct ir3_block *block)
+void ir3_block_cp(struct ir3_block *block)
 {
-       unsigned i;
+       ir3_clear_mark(block->shader);
 
-       for (i = 0; i < block->noutputs; i++) {
+       for (unsigned i = 0; i < block->noutputs; i++) {
                if (block->outputs[i]) {
                        struct ir3_instruction *out =
                                        instr_cp(block->outputs[i], NULL);
@@ -407,9 +400,3 @@ static void block_cp(struct ir3_block *block)
                }
        }
 }
-
-void ir3_block_cp(struct ir3_block *block)
-{
-       ir3_clear_mark(block->shader);
-       block_cp(block);
-}
index b899c66b37e36fa04cca70e2b101b2d7babec158..601e14a1c85fb0e100cc7d163318a09e20947f5b 100644 (file)
@@ -84,25 +84,25 @@ int ir3_delayslots(struct ir3_instruction *assigner,
        }
 }
 
-static void insert_by_depth(struct ir3_instruction *instr)
+void
+ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
 {
-       struct ir3_block *block = instr->block;
-       struct ir3_instruction *n = block->head;
-       struct ir3_instruction *p = NULL;
-
-       while (n && (n != instr) && (n->depth > instr->depth)) {
-               p = n;
-               n = n->next;
+       /* remove from existing spot in list: */
+       list_delinit(&instr->node);
+
+       /* find where to re-insert instruction: */
+       list_for_each_entry (struct ir3_instruction, pos, list, node) {
+               if (pos->depth > instr->depth) {
+                       list_add(&instr->node, &pos->node);
+                       return;
+               }
        }
-
-       instr->next = n;
-       if (p)
-               p->next = instr;
-       else
-               block->head = instr;
+       /* if we get here, we didn't find an insertion spot: */
+       list_addtail(&instr->node, list);
 }
 
-static void ir3_instr_depth(struct ir3_instruction *instr)
+static void
+ir3_instr_depth(struct ir3_instruction *instr)
 {
        struct ir3_instruction *src;
 
@@ -123,42 +123,38 @@ static void ir3_instr_depth(struct ir3_instruction *instr)
                instr->depth = MAX2(instr->depth, sd);
        }
 
-       /* meta-instructions don't add cycles, other than PHI.. which
-        * might translate to a real instruction..
-        *
-        * well, not entirely true, fan-in/out, etc might need to need
-        * to generate some extra mov's in edge cases, etc.. probably
-        * we might want to do depth calculation considering the worst
-        * case for these??
-        */
        if (!is_meta(instr))
                instr->depth++;
 
-       insert_by_depth(instr);
+       ir3_insert_by_depth(instr, &instr->block->instr_list);
+}
+
+static void
+remove_unused_by_block(struct ir3_block *block)
+{
+       list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+               if (!ir3_instr_check_mark(instr)) {
+                       /* mark it, in case it is input, so we can
+                        * remove unused inputs:
+                        */
+                       instr->depth = DEPTH_UNUSED;
+                       /* and remove from instruction list: */
+                       list_delinit(&instr->node);
+               }
+       }
 }
 
 void ir3_block_depth(struct ir3_block *block)
 {
        unsigned i;
 
-       block->head = NULL;
-
        ir3_clear_mark(block->shader);
        for (i = 0; i < block->noutputs; i++)
                if (block->outputs[i])
                        ir3_instr_depth(block->outputs[i]);
 
        /* mark un-used instructions: */
-       for (i = 0; i < block->shader->instrs_count; i++) {
-               struct ir3_instruction *instr = block->shader->instrs[i];
-
-               /* just consider instructions within this block: */
-               if (instr->block != block)
-                       continue;
-
-               if (!ir3_instr_check_mark(instr))
-                       instr->depth = DEPTH_UNUSED;
-       }
+       remove_unused_by_block(block);
 
        /* cleanup unused inputs: */
        for (i = 0; i < block->ninputs; i++) {
index 61713c25e72e25b0c4062beb8945401e23565209..be0b5ce442c5cb4606eae1467708c20084672718 100644 (file)
@@ -51,12 +51,9 @@ struct ir3_legalize_ctx {
 static void legalize(struct ir3_legalize_ctx *ctx)
 {
        struct ir3_block *block = ctx->block;
-       struct ir3_instruction *n;
-       struct ir3 *shader = block->shader;
-       struct ir3_instruction *end =
-                       ir3_instr_create(block, 0, OPC_END);
        struct ir3_instruction *last_input = NULL;
        struct ir3_instruction *last_rel = NULL;
+       struct list_head instr_list;
        regmask_t needs_ss_war;       /* write after read */
        regmask_t needs_ss;
        regmask_t needs_sy;
@@ -65,9 +62,13 @@ static void legalize(struct ir3_legalize_ctx *ctx)
        regmask_init(&needs_ss);
        regmask_init(&needs_sy);
 
-       shader->instrs_count = 0;
+       /* remove all the instructions from the list, we'll be adding
+        * them back in as we go
+        */
+       list_replace(&block->instr_list, &instr_list);
+       list_inithead(&block->instr_list);
 
-       for (n = block->head; n; n = n->next) {
+       list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
                struct ir3_register *reg;
                unsigned i;
 
@@ -140,12 +141,12 @@ static void legalize(struct ir3_legalize_ctx *ctx)
                }
 
                /* need to be able to set (ss) on first instruction: */
-               if ((shader->instrs_count == 0) && (n->category >= 5))
+               if (list_empty(&block->instr_list) && (n->category >= 5))
                        ir3_NOP(block);
 
-               if (is_nop(n) && shader->instrs_count) {
-                       struct ir3_instruction *last =
-                                       shader->instrs[shader->instrs_count-1];
+               if (is_nop(n) && !list_empty(&block->instr_list)) {
+                       struct ir3_instruction *last = list_last_entry(&block->instr_list,
+                                       struct ir3_instruction, node);
                        if (is_nop(last) && (last->repeat < 5)) {
                                last->repeat++;
                                last->flags |= n->flags;
@@ -153,7 +154,7 @@ static void legalize(struct ir3_legalize_ctx *ctx)
                        }
                }
 
-               shader->instrs[shader->instrs_count++] = n;
+               list_addtail(&n->node, &block->instr_list);
 
                if (is_sfu(n))
                        regmask_set(&needs_ss, n->regs[0]);
@@ -192,35 +193,19 @@ static void legalize(struct ir3_legalize_ctx *ctx)
                 * the (ei) flag:
                 */
                if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
-                       int i, cnt;
+                       struct ir3_instruction *baryf;
 
-                       /* note that ir3_instr_create() inserts into
-                        * shader->instrs[] and increments the count..
-                        * so we need to bump up the cnt initially (to
-                        * avoid it clobbering the last real instr) and
-                        * restore it after.
-                        */
-                       cnt = ++shader->instrs_count;
-
-                       /* inserting instructions would be a bit nicer if list.. */
-                       for (i = cnt - 2; i >= 0; i--) {
-                               if (shader->instrs[i] == last_input) {
-
-                                       /* (ss)bary.f (ei)r63.x, 0, r0.x */
-                                       last_input = ir3_instr_create(block, 2, OPC_BARY_F);
-                                       last_input->flags |= IR3_INSTR_SS;
-                                       ir3_reg_create(last_input, regid(63, 0), 0);
-                                       ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
-                                       ir3_reg_create(last_input, regid(0, 0), 0);
+                       /* (ss)bary.f (ei)r63.x, 0, r0.x */
+                       baryf = ir3_instr_create(block, 2, OPC_BARY_F);
+                       baryf->flags |= IR3_INSTR_SS;
+                       ir3_reg_create(baryf, regid(63, 0), 0);
+                       ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+                       ir3_reg_create(baryf, regid(0, 0), 0);
 
-                                       shader->instrs[i + 1] = last_input;
-
-                                       break;
-                               }
-                               shader->instrs[i + 1] = shader->instrs[i];
-                       }
+                       /* insert the dummy bary.f after last_input: */
+                       list_add(&baryf->node, &last_input->node);
 
-                       shader->instrs_count = cnt;
+                       last_input = baryf;
                }
                last_input->regs[0]->flags |= IR3_REG_EI;
        }
@@ -228,9 +213,11 @@ static void legalize(struct ir3_legalize_ctx *ctx)
        if (last_rel)
                last_rel->flags |= IR3_INSTR_UL;
 
-       shader->instrs[shader->instrs_count++] = end;
+       /* create/add 'end' instruction: */
+       ir3_instr_create(block, 0, OPC_END);
 
-       shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+       list_first_entry(&block->instr_list, struct ir3_instruction, node)
+               ->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
 }
 
 void ir3_block_legalize(struct ir3_block *block,
index a5c5d3c8efada62f9e8191df021e64acad587228..755c0c23c369dfa681ebd0c463b1b3576546ab47 100644 (file)
@@ -186,9 +186,8 @@ void ir3_print_instr(struct ir3_instruction *instr)
 static void
 print_block(struct ir3_block *block, int lvl)
 {
-       struct ir3_instruction *instr;
        tab(lvl); printf("block {\n");
-       for (instr = block->head; instr; instr = instr->next) {
+       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
                print_instr(instr, lvl+1);
        }
        tab(lvl); printf("}\n");
index 501352515b58b44317f1dfecec9c21d7def41875..95f6a81861e3aa9fad10284edfa3c462e789d286 100644 (file)
@@ -75,10 +75,10 @@ struct ir3_ra_ctx {
 #  define ra_debug 0
 #endif
 
-#define ra_dump_list(msg, n) do { \
+#define ra_dump_list(msg, ir) do { \
                if (ra_debug) { \
                        debug_printf("-- " msg); \
-                       ir3_print(n->block->shader); \
+                       ir3_print(ir); \
                } \
        } while (0)
 
@@ -175,14 +175,13 @@ static void mark_sources(struct ir3_instruction *instr,
 static void compute_liveregs(struct ir3_ra_ctx *ctx,
                struct ir3_instruction *instr, regmask_t *liveregs)
 {
-       struct ir3_block *block = instr->block;
-       struct ir3_instruction *n;
+       struct ir3_block *block = ctx->block;
        regmask_t written;
        unsigned i;
 
        regmask_init(&written);
 
-       for (n = instr->next; n; n = n->next) {
+       list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
                struct ir3_register *r;
 
                if (is_meta(n))
@@ -411,9 +410,8 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
 static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
                struct ir3_instruction *instr, unsigned name)
 {
-       struct ir3_instruction *n, *src;
-
-       for (n = instr->next; n && !ctx->error; n = n->next) {
+       list_for_each_entry (struct ir3_instruction, n, &instr->node, node) {
+               struct ir3_instruction *src;
                foreach_ssa_src_n(src, i, n) {
                        unsigned r = i + 1;
 
@@ -424,6 +422,8 @@ static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
                        if (src == instr)
                                instr_assign_src(ctx, n, r, name);
                }
+               if (ctx->error)
+                       break;
        }
 }
 
@@ -589,14 +589,45 @@ static void instr_assign_array(struct ir3_ra_ctx *ctx,
 
 }
 
-static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+static bool
+block_ra(struct ir3_block *block, void *state)
 {
-       struct ir3_instruction *n;
+       struct ir3_ra_ctx *ctx = state;
+
+       ra_dump_list("-------\n", block->shader);
+
+       /* first pass, assign arrays: */
+       list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
+               if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
+                       debug_assert(!n->cp.left);  /* don't think this should happen */
+                       ra_dump_instr("ASSIGN ARRAY: ", n);
+                       instr_assign_array(ctx, n);
+                       ra_dump_list("-------\n", block->shader);
+               }
+
+               if (ctx->error)
+                       return false;
+       }
+
+       list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) {
+               ra_dump_instr("ASSIGN: ", n);
+               instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
+               ra_dump_list("-------\n", block->shader);
+
+               if (ctx->error)
+                       return false;
+       }
+
+       return true;
+}
 
+static int
+shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
        /* frag shader inputs get pre-assigned, since we have some
         * constraints/unknowns about setup for some of these regs:
         */
-       if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+       if (ctx->type == SHADER_FRAGMENT) {
                unsigned i = 0, j;
                if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
                        /* if we have frag_face, it gets hr0.x */
@@ -608,31 +639,23 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                                instr_assign(ctx, block->inputs[i], j);
        }
 
-       ra_dump_list("-------\n", block->head);
-
-       /* first pass, assign arrays: */
-       for (n = block->head; n && !ctx->error; n = n->next) {
-               if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
-                       debug_assert(!n->cp.left);  /* don't think this should happen */
-                       ra_dump_instr("ASSIGN ARRAY: ", n);
-                       instr_assign_array(ctx, n);
-                       ra_dump_list("-------\n", block->head);
-               }
-       }
-
-       for (n = block->head; n && !ctx->error; n = n->next) {
-               ra_dump_instr("ASSIGN: ", n);
-               instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
-               ra_dump_list("-------\n", block->head);
-       }
+       block_ra(block, ctx);
 
        return ctx->error ? -1 : 0;
 }
 
+static bool
+block_mark_dst(struct ir3_block *block, void *state)
+{
+       list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node)
+               if (n->regs_count > 0)
+                       n->regs[0]->flags |= IR3_REG_SSA;
+       return true;
+}
+
 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
                bool frag_coord, bool frag_face)
 {
-       struct ir3_instruction *n;
        struct ir3_ra_ctx ctx = {
                        .block = block,
                        .type = type,
@@ -648,12 +671,10 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
         * NOTE: we really should set SSA flag consistently on
         * every dst register in the frontend.
         */
-       for (n = block->head; n; n = n->next)
-               if (n->regs_count > 0)
-                       n->regs[0]->flags |= IR3_REG_SSA;
+       block_mark_dst(block, &ctx);
 
        ir3_clear_mark(block->shader);
-       ret = block_ra(&ctx, block);
+       ret = shader_ra(&ctx, block);
 
        return ret;
 }
index 5ca6d7b62d5afdb31b96a9de50e9c13901475a23..fc41f93b884f1d1ad59750548452ffe7180a1a3e 100644 (file)
@@ -88,26 +88,21 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs)
        return d;
 }
 
-static unsigned distance(struct ir3_sched_ctx *ctx,
-               struct ir3_instruction *instr, unsigned maxd)
+static unsigned
+distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
+               unsigned maxd)
 {
-       struct ir3_instruction *n = ctx->scheduled;
+       struct list_head *instr_list = &instr->block->instr_list;
        unsigned d = 0;
-       while (n && (n != instr) && (d < maxd)) {
+
+       list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
+               if ((n == instr) || (d >= maxd))
+                       break;
                if (is_alu(n) || is_flow(n))
                        d++;
-               n = n->next;
        }
-       return d;
-}
 
-/* TODO maybe we want double linked list? */
-static struct ir3_instruction * prev(struct ir3_instruction *instr)
-{
-       struct ir3_instruction *p = instr->block->head;
-       while (p && (p->next != instr))
-               p = p->next;
-       return p;
+       return d;
 }
 
 static bool is_sfu_or_mem(struct ir3_instruction *instr)
@@ -125,25 +120,11 @@ static void schedule(struct ir3_sched_ctx *ctx,
         * scheduling and depth calculation..
         */
        if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
-               schedule(ctx, ir3_NOP(block), false);
+               ir3_NOP(block);
 
        /* remove from depth list:
         */
-       if (remove) {
-               struct ir3_instruction *p = prev(instr);
-
-               /* NOTE: this can happen for inputs which are not
-                * read.. in that case there is no need to schedule
-                * the input, so just bail:
-                */
-               if (instr != (p ? p->next : block->head))
-                       return;
-
-               if (p)
-                       p->next = instr->next;
-               else
-                       block->head = instr->next;
-       }
+       list_delinit(&instr->node);
 
        if (writes_addr(instr)) {
                assert(ctx->addr == NULL);
@@ -157,7 +138,7 @@ static void schedule(struct ir3_sched_ctx *ctx,
 
        instr->flags |= IR3_INSTR_MARK;
 
-       instr->next = ctx->scheduled;
+       list_addtail(&instr->node, &instr->block->instr_list);
        ctx->scheduled = instr;
 
        ctx->cnt++;
@@ -284,18 +265,6 @@ static int trysched(struct ir3_sched_ctx *ctx,
        return SCHEDULED;
 }
 
-static struct ir3_instruction * reverse(struct ir3_instruction *instr)
-{
-       struct ir3_instruction *reversed = NULL;
-       while (instr) {
-               struct ir3_instruction *next = instr->next;
-               instr->next = reversed;
-               reversed = instr;
-               instr = next;
-       }
-       return reversed;
-}
-
 static bool uses_current_addr(struct ir3_sched_ctx *ctx,
                struct ir3_instruction *instr)
 {
@@ -317,16 +286,14 @@ static bool uses_current_pred(struct ir3_sched_ctx *ctx,
  * other instructions using the current address register:
  */
 static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
-               struct ir3_block *block)
+               struct list_head *unscheduled_list)
 {
-       struct ir3_instruction *instr = block->head;
        bool addr_in_use = false;
        bool pred_in_use = false;
        bool all_delayed = true;
        unsigned cnt = ~0, attempted = 0;
 
-       while (instr) {
-               struct ir3_instruction *next = instr->next;
+       list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) {
                bool addr = uses_current_addr(ctx, instr);
                bool pred = uses_current_pred(ctx, instr);
 
@@ -347,8 +314,6 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
 
                        attempted++;
                }
-
-               instr = next;
        }
 
        if (!addr_in_use)
@@ -408,7 +373,10 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
 
 static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 {
-       struct ir3_instruction *instr;
+       struct list_head unscheduled_list;
+
+       list_replace(&block->instr_list, &unscheduled_list);
+       list_inithead(&block->instr_list);
 
        /* schedule all the shader input's (meta-instr) first so that
         * the RA step sees that the input registers contain a value
@@ -423,31 +391,22 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
                }
        }
 
-       while ((instr = block->head) && !ctx->error) {
-               /* NOTE: always grab next *before* trysched(), in case the
-                * instruction is actually scheduled (and therefore moved
-                * from depth list into scheduled list)
-                */
-               struct ir3_instruction *next = instr->next;
+       list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
                int cnt = trysched(ctx, instr);
 
                if (cnt == DELAYED)
-                       cnt = block_sched_undelayed(ctx, block);
+                       cnt = block_sched_undelayed(ctx, &unscheduled_list);
 
                /* -1 is signal to return up stack, but to us means same as 0: */
                cnt = MAX2(0, cnt);
                cnt += ctx->cnt;
-               instr = next;
 
                /* if deepest remaining instruction cannot be scheduled, try
                 * the increasingly more shallow instructions until needed
                 * number of delay slots is filled:
                 */
-               while (instr && (cnt > ctx->cnt)) {
-                       next = instr->next;
+               list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node)
                        trysched(ctx, instr);
-                       instr = next;
-               }
 
                /* and if we run out of instructions that can be scheduled,
                 * then it is time for nop's:
@@ -455,9 +414,6 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
                while (cnt > ctx->cnt)
                        schedule(ctx, ir3_NOP(block), false);
        }
-
-       /* at this point, scheduled list is in reverse order, so fix that: */
-       block->head = reverse(ctx->scheduled);
 }
 
 int ir3_block_sched(struct ir3_block *block)