vc4: Add support for NIR loops and break/continue.
authorEric Anholt <eric@anholt.net>
Mon, 2 May 2016 20:35:21 +0000 (13:35 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 13 Jul 2016 00:42:42 +0000 (17:42 -0700)
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.h

index cb3ef6e45f3d017d29aef9c78db897e98ea05e47..465e052053e9c817dc0bead246399dfa02221f69 100644 (file)
@@ -1763,6 +1763,27 @@ ntq_emit_if(struct vc4_compile *c, nir_if *if_stmt)
 
 }
 
+static void
+ntq_emit_jump(struct vc4_compile *c, nir_jump_instr *jump)
+{
+        switch (jump->type) {
+        case nir_jump_break:
+                qir_SF(c, c->execute);
+                qir_MOV_cond(c, QPU_COND_ZS, c->execute,
+                             qir_uniform_ui(c, c->loop_break_block->index));
+                break;
+
+        case nir_jump_continue:
+                qir_SF(c, c->execute);
+                qir_MOV_cond(c, QPU_COND_ZS, c->execute,
+                             qir_uniform_ui(c, c->loop_cont_block->index));
+                break;
+
+        case nir_jump_return:
+                unreachable("All returns shouold be lowered\n");
+        }
+}
+
 static void
 ntq_emit_instr(struct vc4_compile *c, nir_instr *instr)
 {
@@ -1787,6 +1808,10 @@ ntq_emit_instr(struct vc4_compile *c, nir_instr *instr)
                 ntq_emit_tex(c, nir_instr_as_tex(instr));
                 break;
 
+        case nir_instr_type_jump:
+                ntq_emit_jump(c, nir_instr_as_jump(instr));
+                break;
+
         default:
                 fprintf(stderr, "Unknown NIR instr type: ");
                 nir_print_instr(instr, stderr);
@@ -1806,10 +1831,59 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
 static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
 
 static void
-ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+ntq_emit_loop(struct vc4_compile *c, nir_loop *loop)
 {
-        fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
-        ntq_emit_cf_list(c, &nloop->body);
+        if (!c->vc4->screen->has_control_flow) {
+                fprintf(stderr,
+                        "loop support requires updated kernel.\n");
+                ntq_emit_cf_list(c, &loop->body);
+                return;
+        }
+
+        bool was_top_level = false;
+        if (c->execute.file == QFILE_NULL) {
+                c->execute = qir_MOV(c, qir_uniform_ui(c, 0));
+                was_top_level = true;
+        }
+
+        struct qblock *save_loop_cont_block = c->loop_cont_block;
+        struct qblock *save_loop_break_block = c->loop_break_block;
+
+        c->loop_cont_block = qir_new_block(c);
+        c->loop_break_block = qir_new_block(c);
+
+        qir_link_blocks(c->cur_block, c->loop_cont_block);
+        qir_set_emit_block(c, c->loop_cont_block);
+        ntq_activate_execute_for_block(c);
+
+        ntq_emit_cf_list(c, &loop->body);
+
+        /* If anything had explicitly continued, or is here at the end of the
+         * loop, then we need to loop again.  SF updates are masked by the
+         * instruction's condition, so we can do the OR of the two conditions
+         * within SF.
+         */
+        qir_SF(c, c->execute);
+        struct qinst *cont_check =
+                qir_SUB_dest(c,
+                             c->undef,
+                             c->execute,
+                             qir_uniform_ui(c, c->loop_cont_block->index));
+        cont_check->cond = QPU_COND_ZC;
+        cont_check->sf = true;
+
+        qir_BRANCH(c, QPU_COND_BRANCH_ANY_ZS);
+        qir_link_blocks(c->cur_block, c->loop_cont_block);
+        qir_link_blocks(c->cur_block, c->loop_break_block);
+
+        qir_set_emit_block(c, c->loop_break_block);
+        if (was_top_level)
+                c->execute = c->undef;
+        else
+                ntq_activate_execute_for_block(c);
+
+        c->loop_break_block = save_loop_break_block;
+        c->loop_cont_block = save_loop_cont_block;
 }
 
 static void
index e284ed58b65256cea5a1355777775ac2aa3ed87e..fe85b769775e0cb53a27abbee4053f1fb4584848 100644 (file)
@@ -463,6 +463,8 @@ struct vc4_compile {
         struct list_head blocks;
         int next_block_index;
         struct qblock *cur_block;
+        struct qblock *loop_cont_block;
+        struct qblock *loop_break_block;
 
         struct list_head qpu_inst_list;
         uint64_t *qpu_insts;