From: Erico Nunes Date: Sun, 17 May 2020 13:56:42 +0000 (+0200) Subject: lima/ppir: use a ready list in node_to_instr X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b3023055e075386e96fe2fbf093f0db261c0d9fa;p=mesa.git lima/ppir: use a ready list in node_to_instr After the recent optimizations in ppir lowering that increase options for combining, node_to_instr now may have multiple options of nodes to insert and needs to decide which is better. For example, if an instruction uses both a varying and a texture, there are two nodes nodes that can be inserted to the load varying slot in the same instruction (ld_var and ld_coords). It is much more advantageous to pipeline the load texture coords since that enables the higher precision path for texture coordinates. However, with the current recursive expansion, this cannot be influenced. This simple ready list implementation in node_to_instr allows it to choose the next node to expand based on a priority score, rather than relying on the random order coming from the recursive expansion. Other than preferring nodes with pipeline output (which covers ld_coords vs ld_var), nodes using later slots in the pipeline are now expanded first, allowing node_to_instr to make all of the earlier (pipelineable) nodes available in the ready list so the best one can be chosen when picking nodes for the earlier slots. Fixes: 632a921bd0d lima/ppir: optimize tex loads with single successor Signed-off-by: Erico Nunes Reviewed-by: Vasily Khoruzhick Part-of: --- diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c index 9f21fa31e26..52632c88ce6 100644 --- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -214,40 +214,88 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node) return true; } -static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node) +static unsigned int ppir_node_score(ppir_node *node) { - /* first try pipeline sched, if that didn't succeed try normal scheduling */ - if (!ppir_do_node_to_instr_try_insert(block, node)) - if (!ppir_do_one_node_to_instr(block, node)) - return false; + /* preferentially expand nodes in later instruction slots first, so + * nodes for earlier slots (which are more likely pipelineable) get + * added to the ready list. */ + unsigned int late_slot = 0; + int *slots = ppir_op_infos[node->op].slots; + if (slots) + for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) + late_slot = MAX2(late_slot, slots[i]); + + /* to untie, favour nodes with pipelines for earlier expansion. + * increase that for nodes with chained pipelines */ + unsigned int pipeline = 0; + ppir_node *n = node; + ppir_dest *dest = ppir_node_get_dest(n); + while (dest && dest->type == ppir_target_pipeline) { + pipeline++; + assert(ppir_node_has_single_src_succ(n)); + n = ppir_node_first_succ(n); + dest = ppir_node_get_dest(n); + } + assert(pipeline < 4); - if (node->is_end) - node->instr->is_end = true; + return (late_slot << 2 | pipeline); +} - /* we have to make sure the dep not be destroyed (due to - * succ change) in ppir_do_node_to_instr, otherwise we can't - * do recursion like this */ - ppir_node_foreach_pred(node, dep) { - ppir_node *pred = dep->pred; - bool ready = true; - - /* pred may already be processed by the previous pred - * (this pred may be both node and previous pred's child) */ - if (pred->instr) - continue; - - /* insert pred only when all its successors have been inserted */ - ppir_node_foreach_succ(pred, dep) { - ppir_node *succ = dep->succ; - if (!succ->instr) { - ready = false; - break; - } +static ppir_node *ppir_ready_list_pick_best(ppir_block *block, + struct list_head *ready_list) +{ + unsigned int best_score = 0; + ppir_node *best = NULL; + + list_for_each_entry(ppir_node, node, ready_list, sched_list) { + unsigned int score = ppir_node_score(node); + if (!best || score > best_score) { + best = node; + best_score = score; } + } - if (ready) { - if (!ppir_do_node_to_instr(block, pred)) + assert(best); + return best; +} + +static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root) +{ + struct list_head ready_list; + list_inithead(&ready_list); + list_addtail(&root->sched_list, &ready_list); + + while (!list_is_empty(&ready_list)) { + ppir_node *node = ppir_ready_list_pick_best(block, &ready_list); + list_del(&node->sched_list); + + /* first try pipeline sched, if that didn't succeed try normal sched */ + if (!ppir_do_node_to_instr_try_insert(block, node)) + if (!ppir_do_one_node_to_instr(block, node)) return false; + + if (node->is_end) + node->instr->is_end = true; + + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + bool ready = true; + + /* pred may already have been processed by a previous node */ + if (pred->instr) + continue; + + /* insert pred only when all its successors have been inserted */ + ppir_node_foreach_succ(pred, dep) { + ppir_node *succ = dep->succ; + if (!succ->instr) { + ready = false; + break; + } + } + + if (ready) + list_addtail(&pred->sched_list, &ready_list); } } diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index 631dcaaad51..8344a0038d6 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -152,6 +152,7 @@ typedef struct { typedef struct ppir_node { struct list_head list; + struct list_head sched_list; ppir_op op; ppir_node_type type; int index;