Try more aggressive approach with cloning uniform and coord loads.
Uniform load can be inserted into any instruction, so let's do that. ARM site
claim that penalty for cache miss is one clock, so we don't lose anything if
we merge it into instruction that uses the result. As side effect we can also
pipeline it and thus decrease reg pressure.
Do the same for varyings that hold texture coords, but for different reason:
looks like there's a special path for coords that increases precision if
varying that holds it is pipelined. If we don't pipeline it and load coords
from a register its precision is fp16 and thus only 10 bits which is not enough
to accurately sample textures of size 1024 or larger.
Since instruction can hold only one uniform load and one varying load,
node_to_instr now creates a move using helper introduced in previous commit if
slot is already taken. As side effect of this change we can also try to
pipeline texture loads and create a move if attempt fails.
Reviewed-by: Erico Nunes <nunes.erico@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
return true;
}
+ assert(ppir_node_has_single_succ(node) || ppir_node_is_root(node));
+ ppir_node *succ = ppir_node_first_succ(node);
+ if (dest->type != ppir_target_register) {
+ switch (succ->type) {
+ case ppir_node_type_alu:
+ case ppir_node_type_branch: {
+ ppir_src *src = ppir_node_get_src_for_pred(succ, node);
+ /* Can consume uniforms directly */
+ src->type = dest->type = ppir_target_pipeline;
+ src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
+ return true;
+ }
+ default:
+ /* Create mov for everyone else */
+ break;
+ }
+ }
+
ppir_node *move = ppir_node_insert_mov(node);
if (unlikely(!move))
return false;
- dest->type = ppir_target_pipeline;
- dest->pipeline = ppir_pipeline_reg_uniform;
+ ppir_src *mov_src = ppir_node_get_src(move, 0);
+ mov_src->type = dest->type = ppir_target_pipeline;
+ mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
return true;
}
return true;
}
- /* Create load_coords node */
- ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
- if (!load)
- return false;
- list_addtail(&load->node.list, &node->list);
-
- ppir_debug("%s create load_coords node %d for %d\n",
- __FUNCTION__, load->node.index, node->index);
-
- load->dest.type = ppir_target_pipeline;
- load->dest.pipeline = ppir_pipeline_reg_discard;
-
- load->src = load_tex->src_coords;
- load->num_src = 1;
-
- ppir_node_foreach_pred_safe(node, dep) {
- ppir_node *pred = dep->pred;
- ppir_node_remove_dep(dep);
- ppir_node_add_dep(&load->node, pred);
+ ppir_node *src_coords = ppir_node_get_src(node, 0)->node;
+ ppir_load_node *load = NULL;
+ if (src_coords && ppir_node_has_single_succ(src_coords) &&
+ (src_coords->op == ppir_op_load_coords))
+ load = ppir_node_to_load(src_coords);
+ else {
+ /* Create load_coords node */
+ load = ppir_node_create(block, ppir_op_load_coords, -1, 0);
+ if (!load)
+ return false;
+ list_addtail(&load->node.list, &node->list);
+
+ load->src = load_tex->src_coords;
+ load->num_src = 1;
+
+ ppir_debug("%s create load_coords node %d for %d\n",
+ __FUNCTION__, load->node.index, node->index);
+
+ ppir_node_foreach_pred_safe(node, dep) {
+ ppir_node *pred = dep->pred;
+ ppir_node_remove_dep(dep);
+ ppir_node_add_dep(&load->node, pred);
+ }
+ ppir_node_add_dep(node, &load->node);
}
- ppir_node_add_dep(node, &load->node);
+ assert(load);
+ load_tex->src_coords.type = load->dest.type = ppir_target_pipeline;
+ load_tex->src_coords.pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
+
+ if (ppir_node_has_single_succ(node)) {
+ ppir_node *succ = ppir_node_first_succ(node);
+ switch (succ->type) {
+ case ppir_node_type_alu:
+ case ppir_node_type_branch: {
+ for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
+ ppir_src *src = ppir_node_get_src(succ, i);
+ if (src->node == node) {
+ /* Can consume samplers directly */
+ src->type = dest->type = ppir_target_pipeline;
+ src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler;
+ }
+ }
+ return true;
+ }
+ default:
+ /* Create mov for everyone else */
+ break;
+ }
+ }
/* Create move node */
ppir_node *move = ppir_node_insert_mov(node);
case ppir_op_const:
child = ppir_node_clone(node->block, child);
break;
- /* Clone uniforms and load textures for each block */
case ppir_op_load_texture:
- case ppir_op_load_uniform:
- case ppir_op_load_varying:
+ /* Clone texture loads for each block */
if (child->block != node->block) {
child = ppir_node_clone(node->block, child);
comp->var_nodes[ns->ssa->index] = child;
}
break;
+ case ppir_op_load_varying:
+ if ((node->op != ppir_op_load_texture)) {
+ /* Clone varying loads for each block */
+ if (child->block != node->block) {
+ child = ppir_node_clone(node->block, child);
+ comp->var_nodes[ns->ssa->index] = child;
+ }
+ break;
+ }
+ /* At least one successor is load_texture, promote it to load_coords
+ * to ensure that is has exactly one successor */
+ child->op = ppir_op_load_coords;
+ /* Fallthrough */
+ case ppir_op_load_uniform:
+ case ppir_op_load_coords:
+ /* Clone uniform and texture coord loads for each block.
+ * Also ensure that each load has a single successor.
+ * Let's do a fetch each time and hope for a cache hit instead
+ * of increasing reg pressure.
+ */
+ if (child->block != node->block || !ppir_node_is_root(child)) {
+ child = ppir_node_clone(node->block, child);
+ comp->var_nodes[ns->ssa->index] = child;
+ }
+ break;
default:
break;
}
ppir_node_clone_tex(ppir_block *block, ppir_node *node)
{
ppir_load_texture_node *tex_node = ppir_node_to_load_texture(node);
- ppir_load_texture_node *new_tnode = ppir_node_create(block, ppir_op_load_texture, -1, 0);
+ ppir_node *tex_coords = tex_node->src_coords.node;
+
+ ppir_node *new_tex_coords = NULL;
+ ppir_load_texture_node *new_tnode = ppir_node_create(block, ppir_op_load_texture, -1, 0);
if (!new_tnode)
return NULL;
list_addtail(&new_tnode->node.list, &block->node_list);
+ if (tex_coords) {
+ new_tex_coords = ppir_node_clone(block, tex_coords);
+ if (!new_tex_coords)
+ return NULL;
+ }
+
ppir_dest *dest = ppir_node_get_dest(node);
new_tnode->dest = *dest;
ppir_src *new_src = ppir_node_get_src(&new_tnode->node, i);
switch (src->type) {
case ppir_target_ssa: {
- ppir_node_target_assign(new_src, src->node);
- ppir_node_add_dep(&new_tnode->node, src->node);
+ ppir_node_target_assign(new_src, new_tex_coords);
+ ppir_node_add_dep(&new_tnode->node, new_tex_coords);
break;
}
case ppir_target_register: {
new_src->node = NULL;
break;
}
+ case ppir_target_pipeline: {
+ new_src->type = src->type;
+ new_src->pipeline = src->pipeline;
+ break;
+ }
default:
/* pipeline is not expected here */
assert(0);
case ppir_op_load_uniform:
case ppir_op_load_varying:
case ppir_op_load_temp:
+ case ppir_op_load_coords:
return ppir_node_clone_load(block, node);
default:
return NULL;
break;
}
case ppir_node_type_load:
- if (node->op == ppir_op_load_varying ||
- node->op == ppir_op_load_fragcoord ||
- node->op == ppir_op_load_pointcoord ||
- node->op == ppir_op_load_frontface) {
- if (!create_new_instr(block, node))
- return false;
- }
- else {
- /* not supported yet */
- assert(0);
- return false;
- }
- break;
case ppir_node_type_load_texture:
+ {
if (!create_new_instr(block, node))
return false;
+
+ /* load varying output can be a register, it doesn't need a mov */
+ switch (node->op) {
+ case ppir_op_load_varying:
+ case ppir_op_load_coords:
+ case ppir_op_load_fragcoord:
+ case ppir_op_load_pointcoord:
+ case ppir_op_load_frontface:
+ return true;
+ default:
+ break;
+ }
+
+ /* Load cannot be pipelined, likely slot is already taken. Create a mov */
+ assert(ppir_node_has_single_succ(node));
+ ppir_dest *dest = ppir_node_get_dest(node);
+ assert(dest->type == ppir_target_pipeline);
+ ppir_pipeline pipeline_reg = dest->pipeline;
+
+ /* Turn dest back to SSA, so we can update predecessors */
+ ppir_node *succ = ppir_node_first_succ(node);
+ ppir_src *succ_src = ppir_node_get_src_for_pred(succ, node);
+ dest->type = ppir_target_ssa;
+ dest->ssa.index = -1;
+ ppir_node_target_assign(succ_src, node);
+
+ ppir_node *move = ppir_node_insert_mov(node);
+ if (unlikely(!move))
+ return false;
+
+ ppir_src *mov_src = ppir_node_get_src(move, 0);
+ mov_src->type = dest->type = ppir_target_pipeline;
+ mov_src->pipeline = dest->pipeline = pipeline_reg;
+
+ ppir_debug("node_to_instr create move %d for load %d\n",
+ move->index, node->index);
+
+ if (!ppir_instr_insert_node(node->instr, move))
+ return false;
+
break;
+ }
case ppir_node_type_const:
/* Const nodes are supposed to go through do_node_to_instr_pipeline() */
assert(false);