From 7594ef6eb0c2c6bcaa5945030d1ad40f4b1c56b1 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 28 Aug 2019 10:57:35 +0200 Subject: [PATCH] lima/gpir: Support branch instructions Because branch conditions have to be in the pass slot, there is no unconditional branch, and realistically the pass slot has to contain a move when branching (there's nothing it does that would be useful for operating on booleans, so we can't use it for anything when computing the branch condition), we put the branch instruction in the pass slot and at codegen time turn it into a move of the branch condition. This means that it doesn't have to be special-cased like store instructions are in the scheduler. Because of this decision we can remove the half-implemented BRANCH codegen slot. Finally, we (ab)use the existing schedule_first mechanism to make sure that branches are always last in the basic block. Reviewed-by: Vasily Khoruzhick --- src/gallium/drivers/lima/ir/gp/codegen.c | 31 ++++--- src/gallium/drivers/lima/ir/gp/gpir.h | 13 ++- src/gallium/drivers/lima/ir/gp/instr.c | 1 - src/gallium/drivers/lima/ir/gp/lower.c | 21 +++++ src/gallium/drivers/lima/ir/gp/nir.c | 108 ++++++++++------------- src/gallium/drivers/lima/ir/gp/node.c | 6 ++ 6 files changed, 102 insertions(+), 78 deletions(-) diff --git a/src/gallium/drivers/lima/ir/gp/codegen.c b/src/gallium/drivers/lima/ir/gp/codegen.c index 76e360b4fb1..113a8a0c0f3 100644 --- a/src/gallium/drivers/lima/ir/gp/codegen.c +++ b/src/gallium/drivers/lima/ir/gp/codegen.c @@ -45,8 +45,6 @@ static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child) gpir_codegen_src_unused, gpir_codegen_src_p1_complex, gpir_codegen_src_unused }, [GPIR_INSTR_SLOT_PASS] = { gpir_codegen_src_unused, gpir_codegen_src_p1_pass, gpir_codegen_src_p2_pass }, - [GPIR_INSTR_SLOT_BRANCH] = { - gpir_codegen_src_unused, gpir_codegen_src_unused, gpir_codegen_src_unused }, [GPIR_INSTR_SLOT_REG0_LOAD0] = { gpir_codegen_src_attrib_x, gpir_codegen_src_p1_attrib_x, gpir_codegen_src_unused }, @@ -418,6 +416,22 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr) return; } + if (node->op == gpir_op_branch_cond) { + gpir_branch_node *branch = gpir_node_to_branch(node); + + code->pass_op = gpir_codegen_pass_op_pass; + code->pass_src = gpir_get_alu_input(node, branch->cond); + + /* Fill out branch information */ + unsigned offset = branch->dest->instr_offset; + assert(offset < 0x200); + code->branch = true; + code->branch_target = offset & 0xff; + code->branch_target_lo = !(offset >> 8); + code->unknown_1 = 13; + return; + } + gpir_alu_node *alu = gpir_node_to_alu(node); code->pass_src = gpir_get_alu_input(node, alu->children[0]); @@ -434,16 +448,7 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr) default: assert(0); } -} - -static void gpir_codegen_branch_slot(gpir_codegen_instr *code, gpir_instr *instr) -{ - gpir_node *node = instr->slots[GPIR_INSTR_SLOT_BRANCH]; - - if (!node) - return; - assert(0); } static void gpir_codegen_reg0_slot(gpir_codegen_instr *code, gpir_instr *instr) @@ -483,7 +488,7 @@ static gpir_codegen_store_src gpir_get_store_input(gpir_node *node) [GPIR_INSTR_SLOT_ADD1] = gpir_codegen_store_src_acc_1, [GPIR_INSTR_SLOT_COMPLEX] = gpir_codegen_store_src_complex, [GPIR_INSTR_SLOT_PASS] = gpir_codegen_store_src_pass, - [GPIR_INSTR_SLOT_BRANCH...GPIR_INSTR_SLOT_STORE3] = gpir_codegen_store_src_none, + [GPIR_INSTR_SLOT_REG0_LOAD0...GPIR_INSTR_SLOT_STORE3] = gpir_codegen_store_src_none, }; gpir_store_node *store = gpir_node_to_store(node); @@ -546,7 +551,6 @@ static void gpir_codegen(gpir_codegen_instr *code, gpir_instr *instr) gpir_codegen_complex_slot(code, instr); gpir_codegen_pass_slot(code, instr); - gpir_codegen_branch_slot(code, instr); gpir_codegen_reg0_slot(code, instr); gpir_codegen_reg1_slot(code, instr); @@ -574,6 +578,7 @@ bool gpir_codegen_prog(gpir_compiler *comp) { int num_instr = 0; list_for_each_entry(gpir_block, block, &comp->block_list, list) { + block->instr_offset = num_instr; num_instr += list_length(&block->instr_list); } diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h index 24924c92567..6cbd406032e 100644 --- a/src/gallium/drivers/lima/ir/gp/gpir.h +++ b/src/gallium/drivers/lima/ir/gp/gpir.h @@ -246,7 +246,6 @@ enum gpir_instr_slot { GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX, - GPIR_INSTR_SLOT_BRANCH, GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1, GPIR_INSTR_SLOT_REG0_LOAD2, @@ -347,6 +346,13 @@ typedef struct gpir_block { struct list_head instr_list; struct gpir_compiler *comp; + struct gpir_block *successors[2]; + struct list_head predecessors; + struct list_head predecessors_node; + + /* For codegen, the offset in the final program. */ + unsigned instr_offset; + /* for scheduler */ union { struct { @@ -361,6 +367,7 @@ typedef struct gpir_block { typedef struct { gpir_node node; gpir_block *dest; + gpir_node *cond; } gpir_branch_node; struct lima_vs_shader_state; @@ -376,6 +383,9 @@ typedef struct gpir_compiler { /* array for searching ssa node */ gpir_node **var_nodes; + /* gpir block for NIR block. */ + gpir_block **blocks; + /* for physical reg */ struct list_head reg_list; int cur_reg; @@ -433,6 +443,7 @@ static inline bool gpir_node_is_leaf(gpir_node *node) #define gpir_node_to_const(node) ((gpir_const_node *)(node)) #define gpir_node_to_load(node) ((gpir_load_node *)(node)) #define gpir_node_to_store(node) ((gpir_store_node *)(node)) +#define gpir_node_to_branch(node) ((gpir_branch_node *)(node)) gpir_instr *gpir_instr_create(gpir_block *block); bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node); diff --git a/src/gallium/drivers/lima/ir/gp/instr.c b/src/gallium/drivers/lima/ir/gp/instr.c index 45e9d817143..5cfb7e34a02 100644 --- a/src/gallium/drivers/lima/ir/gp/instr.c +++ b/src/gallium/drivers/lima/ir/gp/instr.c @@ -535,7 +535,6 @@ void gpir_instr_print_prog(gpir_compiler *comp) [GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" }, [GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" }, [GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" }, - [GPIR_INSTR_SLOT_BRANCH] = { 4, "bnch" }, [GPIR_INSTR_SLOT_STORE3] = { 15, "store" }, [GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" }, [GPIR_INSTR_SLOT_PASS] = { 4, "pass" }, diff --git a/src/gallium/drivers/lima/ir/gp/lower.c b/src/gallium/drivers/lima/ir/gp/lower.c index 6d6aa7e80ff..eaeeeb8f1eb 100644 --- a/src/gallium/drivers/lima/ir/gp/lower.c +++ b/src/gallium/drivers/lima/ir/gp/lower.c @@ -413,6 +413,26 @@ static bool gpir_lower_not(gpir_block *block, gpir_node *node) return true; } +/* There is no unconditional branch instruction, so we have to lower it to a + * conditional branch with a condition of 1.0. + */ + +static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node) +{ + gpir_branch_node *branch = gpir_node_to_branch(node); + + gpir_node *node_const = gpir_node_create(block, gpir_op_const); + gpir_const_node *c = gpir_node_to_const(node_const); + + list_addtail(&c->node.list, &node->list); + c->value.f = 1.0f; + gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT); + + branch->node.op = gpir_op_branch_cond; + branch->cond = node_const; + + return true; +} static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { [gpir_op_not] = gpir_lower_not, @@ -424,6 +444,7 @@ static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node [gpir_op_eq] = gpir_lower_eq_ne, [gpir_op_ne] = gpir_lower_eq_ne, [gpir_op_abs] = gpir_lower_abs, + [gpir_op_branch_uncond] = gpir_lower_branch_uncond, }; bool gpir_pre_rsched_lower_prog(gpir_compiler *comp) diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c index 6dcb4d88f02..e2dc939f1a0 100644 --- a/src/gallium/drivers/lima/ir/gp/nir.c +++ b/src/gallium/drivers/lima/ir/gp/nir.c @@ -272,8 +272,8 @@ static bool gpir_emit_tex(gpir_block *block, nir_instr *ni) static bool gpir_emit_jump(gpir_block *block, nir_instr *ni) { - gpir_error("nir_jump_instr not support\n"); - return false; + /* Jumps are emitted at the end of the basic block, so do nothing. */ + return true; } static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = { @@ -285,79 +285,61 @@ static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = [nir_instr_type_jump] = gpir_emit_jump, }; -static gpir_block *gpir_block_create(gpir_compiler *comp) +static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *impl) { - gpir_block *block = ralloc(comp, gpir_block); - if (!block) - return NULL; + nir_index_blocks(impl); + comp->blocks = ralloc_array(comp, gpir_block *, impl->num_blocks); - list_inithead(&block->node_list); - list_inithead(&block->instr_list); + nir_foreach_block(block_nir, impl) { + gpir_block *block = ralloc(comp, gpir_block); + if (!block) + return false; - return block; -} + list_inithead(&block->node_list); + list_inithead(&block->instr_list); -static bool gpir_emit_block(gpir_compiler *comp, nir_block *nblock) -{ - gpir_block *block = gpir_block_create(comp); - if (!block) - return false; + list_addtail(&block->list, &comp->block_list); + block->comp = comp; + comp->blocks[block_nir->index] = block; + } - list_addtail(&block->list, &comp->block_list); - block->comp = comp; + nir_foreach_block(block_nir, impl) { + gpir_block *block = comp->blocks[block_nir->index]; + nir_foreach_instr(instr, block_nir) { + assert(instr->type < nir_instr_type_phi); + if (!gpir_emit_instr[instr->type](block, instr)) + return false; + } - nir_foreach_instr(instr, nblock) { - assert(instr->type < nir_instr_type_phi); - if (!gpir_emit_instr[instr->type](block, instr)) - return false; - } + if (block_nir->successors[0] == impl->end_block) + block->successors[0] = NULL; + else + block->successors[0] = comp->blocks[block_nir->successors[0]->index]; + block->successors[1] = NULL; - return true; -} + if (block_nir->successors[1] != NULL) { + nir_if *nif = nir_cf_node_as_if(nir_cf_node_next(&block_nir->cf_node)); + gpir_alu_node *cond = gpir_node_create(block, gpir_op_not); + list_addtail(&cond->node.list, &block->node_list); + cond->children[0] = gpir_node_find(block, &cond->node, &nif->condition, 0); + gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT); -static bool gpir_emit_if(gpir_compiler *comp, nir_if *nif) -{ - gpir_error("if nir_cf_node not support\n"); - return false; -} + gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_cond); + list_addtail(&branch->node.list, &block->node_list); -static bool gpir_emit_loop(gpir_compiler *comp, nir_loop *nloop) -{ - gpir_error("loop nir_cf_node not support\n"); - return false; -} + branch->dest = comp->blocks[block_nir->successors[1]->index]; + block->successors[1] = branch->dest; -static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *nfunc) -{ - gpir_error("function nir_cf_node not support\n"); - return false; -} + branch->cond = &cond->node; + gpir_node_add_dep(&branch->node, &cond->node, GPIR_DEP_INPUT); -static bool gpir_emit_cf_list(gpir_compiler *comp, struct exec_list *list) -{ - foreach_list_typed(nir_cf_node, node, node, list) { - bool ret; + assert(block_nir->successors[0]->index == block_nir->index + 1); + } else if (block_nir->successors[0]->index != block_nir->index + 1) { + gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_uncond); + list_addtail(&branch->node.list, &block->node_list); - switch (node->type) { - case nir_cf_node_block: - ret = gpir_emit_block(comp, nir_cf_node_as_block(node)); - break; - case nir_cf_node_if: - ret = gpir_emit_if(comp, nir_cf_node_as_if(node)); - break; - case nir_cf_node_loop: - ret = gpir_emit_loop(comp, nir_cf_node_as_loop(node)); - break; - case nir_cf_node_function: - ret = gpir_emit_function(comp, nir_cf_node_as_function(node)); - break; - default: - gpir_error("unknown NIR node type %d\n", node->type); - return false; + branch->dest = comp->blocks[block_nir->successors[0]->index]; } - - if (!ret) - return false; } return true; @@ -430,7 +412,7 @@ bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir, comp->constant_base = nir->num_uniforms; prog->uniform_pending_offset = nir->num_uniforms * 16; - if (!gpir_emit_cf_list(comp, &func->body)) + if (!gpir_emit_function(comp, func)) goto err_out0; gpir_node_print_prog_seq(comp); diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c index 1bf9d806c30..e62512890b3 100644 --- a/src/gallium/drivers/lima/ir/gp/node.c +++ b/src/gallium/drivers/lima/ir/gp/node.c @@ -246,6 +246,8 @@ const gpir_op_info gpir_op_infos[] = { [gpir_op_branch_cond] = { .name = "branch_cond", .type = gpir_node_type_branch, + .schedule_first = true, + .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END }, }, [gpir_op_const] = { .name = "const", @@ -380,6 +382,10 @@ void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_store_node *store = gpir_node_to_store(parent); if (store->child == old_child) store->child = new_child; + } else if (parent->type == gpir_node_type_branch) { + gpir_branch_node *branch = gpir_node_to_branch(parent); + if (branch->cond == old_child) + branch->cond = new_child; } } -- 2.30.2