From: Vasily Khoruzhick Date: Wed, 28 Aug 2019 06:02:12 +0000 (-0700) Subject: lima/ppir: clone uniforms and load_coords into each successor X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1c1890fa707789aaf7e1ef265c521799b2f348c5;p=mesa.git lima/ppir: clone uniforms and load_coords into each successor Try more aggressive approach with cloning uniform and coord loads. Uniform load can be inserted into any instruction, so let's do that. ARM site claim that penalty for cache miss is one clock, so we don't lose anything if we merge it into instruction that uses the result. As side effect we can also pipeline it and thus decrease reg pressure. Do the same for varyings that hold texture coords, but for different reason: looks like there's a special path for coords that increases precision if varying that holds it is pipelined. If we don't pipeline it and load coords from a register its precision is fp16 and thus only 10 bits which is not enough to accurately sample textures of size 1024 or larger. Since instruction can hold only one uniform load and one varying load, node_to_instr now creates a move using helper introduced in previous commit if slot is already taken. As side effect of this change we can also try to pipeline texture loads and create a move if attempt fails. Reviewed-by: Erico Nunes Signed-off-by: Vasily Khoruzhick --- diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c index ee63912a1b7..ee43b8978e7 100644 --- a/src/gallium/drivers/lima/ir/pp/lower.c +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -96,12 +96,31 @@ static bool ppir_lower_load(ppir_block *block, ppir_node *node) return true; } + assert(ppir_node_has_single_succ(node) || ppir_node_is_root(node)); + ppir_node *succ = ppir_node_first_succ(node); + if (dest->type != ppir_target_register) { + switch (succ->type) { + case ppir_node_type_alu: + case ppir_node_type_branch: { + ppir_src *src = ppir_node_get_src_for_pred(succ, node); + /* Can consume uniforms directly */ + src->type = dest->type = ppir_target_pipeline; + src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform; + return true; + } + default: + /* Create mov for everyone else */ + break; + } + } + ppir_node *move = ppir_node_insert_mov(node); if (unlikely(!move)) return false; - dest->type = ppir_target_pipeline; - dest->pipeline = ppir_pipeline_reg_uniform; + ppir_src *mov_src = ppir_node_get_src(move, 0); + mov_src->type = dest->type = ppir_target_pipeline; + mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform; return true; } @@ -134,28 +153,56 @@ static bool ppir_lower_texture(ppir_block *block, ppir_node *node) return true; } - /* Create load_coords node */ - ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0); - if (!load) - return false; - list_addtail(&load->node.list, &node->list); - - ppir_debug("%s create load_coords node %d for %d\n", - __FUNCTION__, load->node.index, node->index); - - load->dest.type = ppir_target_pipeline; - load->dest.pipeline = ppir_pipeline_reg_discard; - - load->src = load_tex->src_coords; - load->num_src = 1; - - ppir_node_foreach_pred_safe(node, dep) { - ppir_node *pred = dep->pred; - ppir_node_remove_dep(dep); - ppir_node_add_dep(&load->node, pred); + ppir_node *src_coords = ppir_node_get_src(node, 0)->node; + ppir_load_node *load = NULL; + if (src_coords && ppir_node_has_single_succ(src_coords) && + (src_coords->op == ppir_op_load_coords)) + load = ppir_node_to_load(src_coords); + else { + /* Create load_coords node */ + load = ppir_node_create(block, ppir_op_load_coords, -1, 0); + if (!load) + return false; + list_addtail(&load->node.list, &node->list); + + load->src = load_tex->src_coords; + load->num_src = 1; + + ppir_debug("%s create load_coords node %d for %d\n", + __FUNCTION__, load->node.index, node->index); + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(&load->node, pred); + } + ppir_node_add_dep(node, &load->node); } - ppir_node_add_dep(node, &load->node); + assert(load); + load_tex->src_coords.type = load->dest.type = ppir_target_pipeline; + load_tex->src_coords.pipeline = load->dest.pipeline = ppir_pipeline_reg_discard; + + if (ppir_node_has_single_succ(node)) { + ppir_node *succ = ppir_node_first_succ(node); + switch (succ->type) { + case ppir_node_type_alu: + case ppir_node_type_branch: { + for (int i = 0; i < ppir_node_get_src_num(succ); i++) { + ppir_src *src = ppir_node_get_src(succ, i); + if (src->node == node) { + /* Can consume samplers directly */ + src->type = dest->type = ppir_target_pipeline; + src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler; + } + } + return true; + } + default: + /* Create mov for everyone else */ + break; + } + } /* Create move node */ ppir_node *move = ppir_node_insert_mov(node); diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 4852e55a8b6..9af1a8e65b0 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -106,15 +106,38 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, case ppir_op_const: child = ppir_node_clone(node->block, child); break; - /* Clone uniforms and load textures for each block */ case ppir_op_load_texture: - case ppir_op_load_uniform: - case ppir_op_load_varying: + /* Clone texture loads for each block */ if (child->block != node->block) { child = ppir_node_clone(node->block, child); comp->var_nodes[ns->ssa->index] = child; } break; + case ppir_op_load_varying: + if ((node->op != ppir_op_load_texture)) { + /* Clone varying loads for each block */ + if (child->block != node->block) { + child = ppir_node_clone(node->block, child); + comp->var_nodes[ns->ssa->index] = child; + } + break; + } + /* At least one successor is load_texture, promote it to load_coords + * to ensure that is has exactly one successor */ + child->op = ppir_op_load_coords; + /* Fallthrough */ + case ppir_op_load_uniform: + case ppir_op_load_coords: + /* Clone uniform and texture coord loads for each block. + * Also ensure that each load has a single successor. + * Let's do a fetch each time and hope for a cache hit instead + * of increasing reg pressure. + */ + if (child->block != node->block || !ppir_node_is_root(child)) { + child = ppir_node_clone(node->block, child); + comp->var_nodes[ns->ssa->index] = child; + } + break; default: break; } diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c index fd6e2efa035..d5a17304630 100644 --- a/src/gallium/drivers/lima/ir/pp/node.c +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -624,13 +624,22 @@ static ppir_node * ppir_node_clone_tex(ppir_block *block, ppir_node *node) { ppir_load_texture_node *tex_node = ppir_node_to_load_texture(node); - ppir_load_texture_node *new_tnode = ppir_node_create(block, ppir_op_load_texture, -1, 0); + ppir_node *tex_coords = tex_node->src_coords.node; + + ppir_node *new_tex_coords = NULL; + ppir_load_texture_node *new_tnode = ppir_node_create(block, ppir_op_load_texture, -1, 0); if (!new_tnode) return NULL; list_addtail(&new_tnode->node.list, &block->node_list); + if (tex_coords) { + new_tex_coords = ppir_node_clone(block, tex_coords); + if (!new_tex_coords) + return NULL; + } + ppir_dest *dest = ppir_node_get_dest(node); new_tnode->dest = *dest; @@ -644,8 +653,8 @@ ppir_node_clone_tex(ppir_block *block, ppir_node *node) ppir_src *new_src = ppir_node_get_src(&new_tnode->node, i); switch (src->type) { case ppir_target_ssa: { - ppir_node_target_assign(new_src, src->node); - ppir_node_add_dep(&new_tnode->node, src->node); + ppir_node_target_assign(new_src, new_tex_coords); + ppir_node_add_dep(&new_tnode->node, new_tex_coords); break; } case ppir_target_register: { @@ -654,6 +663,11 @@ ppir_node_clone_tex(ppir_block *block, ppir_node *node) new_src->node = NULL; break; } + case ppir_target_pipeline: { + new_src->type = src->type; + new_src->pipeline = src->pipeline; + break; + } default: /* pipeline is not expected here */ assert(0); @@ -693,6 +707,7 @@ ppir_node *ppir_node_clone(ppir_block *block, ppir_node *node) case ppir_op_load_uniform: case ppir_op_load_varying: case ppir_op_load_temp: + case ppir_op_load_coords: return ppir_node_clone_load(block, node); default: return NULL; diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c index 50a3c65ebc7..5bc8d29c469 100644 --- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -93,23 +93,52 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n break; } case ppir_node_type_load: - if (node->op == ppir_op_load_varying || - node->op == ppir_op_load_fragcoord || - node->op == ppir_op_load_pointcoord || - node->op == ppir_op_load_frontface) { - if (!create_new_instr(block, node)) - return false; - } - else { - /* not supported yet */ - assert(0); - return false; - } - break; case ppir_node_type_load_texture: + { if (!create_new_instr(block, node)) return false; + + /* load varying output can be a register, it doesn't need a mov */ + switch (node->op) { + case ppir_op_load_varying: + case ppir_op_load_coords: + case ppir_op_load_fragcoord: + case ppir_op_load_pointcoord: + case ppir_op_load_frontface: + return true; + default: + break; + } + + /* Load cannot be pipelined, likely slot is already taken. Create a mov */ + assert(ppir_node_has_single_succ(node)); + ppir_dest *dest = ppir_node_get_dest(node); + assert(dest->type == ppir_target_pipeline); + ppir_pipeline pipeline_reg = dest->pipeline; + + /* Turn dest back to SSA, so we can update predecessors */ + ppir_node *succ = ppir_node_first_succ(node); + ppir_src *succ_src = ppir_node_get_src_for_pred(succ, node); + dest->type = ppir_target_ssa; + dest->ssa.index = -1; + ppir_node_target_assign(succ_src, node); + + ppir_node *move = ppir_node_insert_mov(node); + if (unlikely(!move)) + return false; + + ppir_src *mov_src = ppir_node_get_src(move, 0); + mov_src->type = dest->type = ppir_target_pipeline; + mov_src->pipeline = dest->pipeline = pipeline_reg; + + ppir_debug("node_to_instr create move %d for load %d\n", + move->index, node->index); + + if (!ppir_instr_insert_node(node->instr, move)) + return false; + break; + } case ppir_node_type_const: /* Const nodes are supposed to go through do_node_to_instr_pipeline() */ assert(false);