From: Vasily Khoruzhick Date: Sat, 11 May 2019 02:17:40 +0000 (-0700) Subject: lima/ppir: implement discard and discard_if X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=af0de6b91c0b2e26e57bd235cbdc7296992502d9;hp=7a7be6139839b57a89f5c6d24d04025fa25ba4a1;p=mesa.git lima/ppir: implement discard and discard_if This commit also adds codegen for branch since we need it for discard_if. Reviewed-by: Qiang Yu Signed-off-by: Vasily Khoruzhick --- diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c index 93fd5628669..73763218d4b 100644 --- a/src/gallium/drivers/lima/ir/pp/codegen.c +++ b/src/gallium/drivers/lima/ir/pp/codegen.c @@ -507,6 +507,42 @@ static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) code[i] = util_float_to_half(constant->value[i].f); } +static void ppir_codegen_encode_discard(ppir_node *node, void *code) +{ + ppir_codegen_field_branch *b = code; + assert(node->op = ppir_op_discard); + + b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0; + b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1; + b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2; +} + +static void ppir_codegen_encode_branch(ppir_node *node, void *code) +{ + ppir_codegen_field_branch *b = code; + ppir_branch_node *branch; + ppir_instr *target_instr; + if (node->op == ppir_op_discard) { + ppir_codegen_encode_discard(node, code); + return; + } + + assert(node->op = ppir_op_branch); + branch = ppir_node_to_branch(node); + + b->branch.unknown_0 = 0x0; + b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]); + b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]); + b->branch.cond_gt = branch->cond_gt; + b->branch.cond_eq = branch->cond_eq; + b->branch.cond_lt = branch->cond_lt; + b->branch.unknown_1 = 0x0; + b->branch.unknown_2 = 0x3; + + target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list); + b->branch.target = target_instr->offset - node->instr->offset; +} + typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); static const ppir_codegen_instr_slot_encode_func @@ -520,6 +556,7 @@ ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, + [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch, }; static const int ppir_codegen_field_size[] = { @@ -634,7 +671,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp) printf("========ppir codegen========\n"); list_for_each_entry(ppir_block, block, &comp->block_list, list) { list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { - printf("%03d: ", instr->index); + printf("%03d (@%6ld): ", instr->index, instr->offset); int n = prog[0] & 0x1f; for (int i = 0; i < n; i++) { if (i && i % 6 == 0) @@ -655,6 +692,7 @@ bool ppir_codegen_prog(ppir_compiler *comp) int size = 0; list_for_each_entry(ppir_block, block, &comp->block_list, list) { list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + instr->offset = size; size += get_instr_encode_size(instr); } } diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c index ae296a4bb82..19cca714fa1 100644 --- a/src/gallium/drivers/lima/ir/pp/instr.c +++ b/src/gallium/drivers/lima/ir/pp/instr.c @@ -135,6 +135,20 @@ static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src, return true; } +static void ppir_update_src_pipeline(ppir_pipeline pipeline, ppir_src *src, + ppir_dest *dest, uint8_t *swizzle) +{ + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + + if (swizzle) { + for (int k = 0; k < 4; k++) + src->swizzle[k] = swizzle[src->swizzle[k]]; + } + } +} + /* make alu node src reflact the pipeline reg */ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline, ppir_dest *dest, uint8_t *swizzle) @@ -146,15 +160,16 @@ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipe ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]); for (int j = 0; j < alu->num_src; j++) { ppir_src *src = alu->src + j; - if (ppir_node_target_equal(src, dest)) { - src->type = ppir_target_pipeline; - src->pipeline = pipeline; + ppir_update_src_pipeline(pipeline, src, dest, swizzle); + } + } - if (swizzle) { - for (int k = 0; k < 4; k++) - src->swizzle[k] = swizzle[src->swizzle[k]]; - } - } + ppir_node *branch_node = instr->slots[PPIR_INSTR_SLOT_BRANCH]; + if (branch_node && (branch_node->type == ppir_node_type_branch)) { + ppir_branch_node *branch = ppir_node_to_branch(branch_node); + for (int j = 0; j < 2; j++) { + ppir_src *src = branch->src + j; + ppir_update_src_pipeline(pipeline, src, dest, swizzle); } } } @@ -234,6 +249,7 @@ static struct { [PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" }, [PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" }, [PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" }, + [PPIR_INSTR_SLOT_BRANCH] = { 4, "brch" }, }; void ppir_instr_print_list(ppir_compiler *comp) diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c index ded92b150c5..192392f6224 100644 --- a/src/gallium/drivers/lima/ir/pp/lower.c +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -400,6 +400,40 @@ static bool ppir_lower_trunc(ppir_block *block, ppir_node *node) return true; } +static bool ppir_lower_branch(ppir_block *block, ppir_node *node) +{ + ppir_branch_node *branch = ppir_node_to_branch(node); + ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0); + + if (!zero) + return false; + + list_addtail(&zero->node.list, &node->list); + + zero->constant.value[0].f = 0; + zero->constant.num = 1; + zero->dest.type = ppir_target_ssa; + zero->dest.ssa.num_components = 1; + zero->dest.ssa.live_in = INT_MAX; + zero->dest.ssa.live_out = 0; + zero->dest.write_mask = 0x01; + + /* For now we're just comparing branch condition with 0, + * in future we should look whether it's possible to move + * comparision node into branch itself and use current + * way as a fallback for complex conditions. + */ + branch->src[1].type = ppir_target_ssa; + branch->src[1].ssa = &zero->dest.ssa; + + branch->cond_gt = true; + branch->cond_lt = true; + + ppir_node_add_dep(&branch->node, &zero->node); + + return true; +} + static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { [ppir_op_const] = ppir_lower_const, [ppir_op_dot2] = ppir_lower_dot, @@ -417,6 +451,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { [ppir_op_load_texture] = ppir_lower_texture, [ppir_op_select] = ppir_lower_select, [ppir_op_trunc] = ppir_lower_trunc, + [ppir_op_branch] = ppir_lower_branch, }; bool ppir_lower_prog(ppir_compiler *comp) diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 1d390827b07..580d5c3aff1 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -204,6 +204,57 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni) return &node->node; } +static ppir_block *ppir_block_create(ppir_compiler *comp); + +static bool ppir_emit_discard_block(ppir_compiler *comp) +{ + ppir_block *block = ppir_block_create(comp); + ppir_discard_node *discard; + if (!block) + return false; + + comp->discard_block = block; + block->comp = comp; + + discard = ppir_node_create(block, ppir_op_discard, -1, 0); + if (discard) + list_addtail(&discard->node.list, &block->node_list); + else + return false; + + return true; +} + +static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni) +{ + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); + ppir_node *node; + ppir_compiler *comp = block->comp; + ppir_branch_node *branch; + + if (!comp->discard_block && !ppir_emit_discard_block(comp)) + return NULL; + + node = ppir_node_create(block, ppir_op_branch, -1, 0); + if (!node) + return NULL; + branch = ppir_node_to_branch(node); + + /* second src and condition will be updated during lowering */ + ppir_node_add_src(block->comp, node, &branch->src[0], + &instr->src[0], u_bit_consecutive(0, instr->num_components)); + branch->target = comp->discard_block; + + return node; +} + +static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni) +{ + ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0); + + return node; +} + static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) { nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); @@ -264,6 +315,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) return &snode->node; + case nir_intrinsic_discard: + return ppir_emit_discard(block, ni); + + case nir_intrinsic_discard_if: + return ppir_emit_discard_if(block, ni); + default: ppir_error("unsupported nir_intrinsic_instr %s\n", nir_intrinsic_infos[instr->intrinsic].name); @@ -452,6 +509,46 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne return comp; } +static void ppir_add_ordering_deps(ppir_compiler *comp) +{ + /* Some intrinsics do not have explicit dependencies and thus depend + * on instructions order. Consider discard_if and store_ouput as + * example. If we don't add fake dependency of discard_if to store_output + * scheduler may put store_output first and since store_output terminates + * shader on Utgard PP, rest of it will never be executed. + * Add fake dependencies for discard/branch/store to preserve + * instruction order. + * + * TODO: scheduler should schedule discard_if as early as possible otherwise + * we may end up with suboptimal code for cases like this: + * + * s3 = s1 < s2 + * discard_if s3 + * s4 = s1 + s2 + * store s4 + * + * In this case store depends on discard_if and s4, but since dependencies can + * be scheduled in any order it can result in code like this: + * + * instr1: s3 = s1 < s3 + * instr2: s4 = s1 + s2 + * instr3: discard_if s3 + * instr4: store s4 + */ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + ppir_node *prev_node = NULL; + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (node->type == ppir_node_type_discard || + node->type == ppir_node_type_store || + node->type == ppir_node_type_branch) { + if (prev_node) + ppir_node_add_dep(node, prev_node); + prev_node = node; + } + } + } +} + bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, struct ra_regs *ra) { @@ -477,6 +574,13 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, if (!ppir_emit_cf_list(comp, &func->body)) goto err_out0; + + /* If we have discard block add it to the very end */ + if (comp->discard_block) + list_addtail(&comp->discard_block->list, &comp->block_list); + + ppir_add_ordering_deps(comp); + ppir_node_print_prog(comp); if (!ppir_lower_prog(comp)) diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c index 5abf263768e..38d7e6284e4 100644 --- a/src/gallium/drivers/lima/ir/pp/node.c +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -281,6 +281,20 @@ const ppir_op_info ppir_op_infos[] = { PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END }, }, + [ppir_op_discard] = { + .name = "discard", + .type = ppir_node_type_discard, + .slots = (int []) { + PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_branch] = { + .name = "branch", + .type = ppir_node_type_branch, + .slots = (int []) { + PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END + }, + }, }; void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) @@ -292,6 +306,8 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) [ppir_node_type_load] = sizeof(ppir_load_node), [ppir_node_type_store] = sizeof(ppir_store_node), [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node), + [ppir_node_type_discard] = sizeof(ppir_discard_node), + [ppir_node_type_branch] = sizeof(ppir_branch_node), }; ppir_node_type type = ppir_op_infos[op].type; diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c index b38fa3aa733..22678ae3349 100644 --- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -93,7 +93,8 @@ static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node) ppir_node_foreach_succ_safe(node, dep) { ppir_node *succ = dep->succ; - assert(succ->type == ppir_node_type_alu); + assert(succ->type == ppir_node_type_alu || + succ->type == ppir_node_type_branch); if (!ppir_instr_insert_node(succ->instr, node)) { /* create a move node to insert for failed node */ @@ -323,6 +324,15 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node) node = move; break; } + case ppir_node_type_discard: + if (!create_new_instr(block, node)) + return false; + node->instr->is_end = true; + break; + case ppir_node_type_branch: + if (!create_new_instr(block, node)) + return false; + break; default: return false; } diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index 71d80dc5196..233e5cdc3d8 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -108,6 +108,9 @@ typedef enum { ppir_op_const, + ppir_op_discard, + ppir_op_branch, + ppir_op_num, } ppir_op; @@ -117,6 +120,8 @@ typedef enum { ppir_node_type_load, ppir_node_type_store, ppir_node_type_load_texture, + ppir_node_type_discard, + ppir_node_type_branch, } ppir_node_type; typedef struct { @@ -254,6 +259,10 @@ typedef struct { int sampler_dim; } ppir_load_texture_node; +typedef struct { + ppir_node node; +} ppir_discard_node; + enum ppir_instr_slot { PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_TEXLD, @@ -264,6 +273,7 @@ enum ppir_instr_slot { PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_STORE_TEMP, + PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_NUM, PPIR_INSTR_SLOT_END, PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL, @@ -287,6 +297,7 @@ typedef struct ppir_instr { int est; /* earliest start time */ int parent_index; bool scheduled; + off_t offset; } ppir_instr; typedef struct ppir_block { @@ -300,6 +311,15 @@ typedef struct ppir_block { int sched_instr_base; } ppir_block; +typedef struct { + ppir_node node; + ppir_src src[2]; + bool cond_gt; + bool cond_eq; + bool cond_lt; + ppir_block *target; +} ppir_branch_node; + struct ra_regs; struct lima_fs_shader_state; @@ -322,6 +342,8 @@ typedef struct ppir_compiler { /* for regalloc spilling debug */ int force_spilling; + + ppir_block *discard_block; } ppir_compiler; void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask); @@ -377,6 +399,8 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node) #define ppir_node_to_load(node) ((ppir_load_node *)(node)) #define ppir_node_to_store(node) ((ppir_store_node *)(node)) #define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node)) +#define ppir_node_to_discard(node) ((ppir_discard_node *)(node)) +#define ppir_node_to_branch(node) ((ppir_branch_node *)(node)) static inline ppir_dest *ppir_node_get_dest(ppir_node *node) {