lima/ppir: duplicate consts in nir
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
index 75a09f0d441549bd79b8c92d3eaf9fb4cb79e734..5175888d2473908d4acdf56075a089a88f6f5a2f 100644 (file)
@@ -42,8 +42,6 @@ static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ss
    ppir_dest *dest = ppir_node_get_dest(node);
    dest->type = ppir_target_ssa;
    dest->ssa.num_components = ssa->num_components;
-   dest->ssa.live_in = INT_MAX;
-   dest->ssa.live_out = 0;
    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
 
    if (node->type == ppir_node_type_load ||
@@ -101,25 +99,22 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
 
    if (ns->is_ssa) {
       child = comp->var_nodes[ns->ssa->index];
-      /* Clone consts for each successor */
       switch (child->op) {
-      case ppir_op_const:
-         child = ppir_node_clone(node->block, child);
-         break;
-      /* Clone uniforms and load textures for each block */
-      case ppir_op_load_texture:
-      case ppir_op_load_uniform:
       case ppir_op_load_varying:
-         if (child->block != node->block) {
-            child = ppir_node_clone(node->block, child);
-            comp->var_nodes[ns->ssa->index] = child;
+         /* If at least one successor is load_texture, promote it to
+          * load_coords to ensure that is has exactly one successor */
+         if (node->op == ppir_op_load_texture) {
+            nir_tex_src *nts = (nir_tex_src *)ns;
+            if (nts->src_type == nir_tex_src_coord)
+               child->op = ppir_op_load_coords;
          }
          break;
       default:
          break;
       }
 
-      ppir_node_add_dep(node, child);
+      if (child->op != ppir_op_undef)
+         ppir_node_add_dep(node, child, ppir_dep_src);
    }
    else {
       nir_register *reg = ns->reg.reg;
@@ -134,7 +129,7 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
          }
          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
          if (child && node != child && child->op != ppir_op_dummy)
-            ppir_node_add_dep(node, child);
+            ppir_node_add_dep(node, child, ppir_dep_src);
       }
    }
 
@@ -165,13 +160,9 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
    [nir_op_fceil] = ppir_op_ceil,
    [nir_op_ffract] = ppir_op_fract,
    [nir_op_sge] = ppir_op_ge,
-   [nir_op_fge] = ppir_op_ge,
    [nir_op_slt] = ppir_op_lt,
-   [nir_op_flt] = ppir_op_lt,
    [nir_op_seq] = ppir_op_eq,
-   [nir_op_feq] = ppir_op_eq,
    [nir_op_sne] = ppir_op_ne,
-   [nir_op_fne] = ppir_op_ne,
    [nir_op_fcsel] = ppir_op_select,
    [nir_op_inot] = ppir_op_not,
    [nir_op_ftrunc] = ppir_op_trunc,
@@ -299,6 +290,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
 
       lnode->num_components = instr->num_components;
       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
+      if (nir_src_is_const(instr->src[0]))
+         lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
+      else {
+         lnode->num_src = 1;
+         ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
+      }
       return &lnode->node;
 
    case nir_intrinsic_load_frag_coord:
@@ -340,7 +337,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
 
       lnode->num_components = instr->num_components;
       lnode->index = nir_intrinsic_base(instr);
-      lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
+      if (nir_src_is_const(instr->src[0]))
+         lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
+      else {
+         lnode->num_src = 1;
+         ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
+      }
 
       return &lnode->node;
 
@@ -352,8 +354,6 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
       dest->type = ppir_target_ssa;
       dest->ssa.num_components = instr->num_components;
-      dest->ssa.live_in = INT_MAX;
-      dest->ssa.live_out = 0;
       dest->ssa.index = 0;
       dest->write_mask = u_bit_consecutive(0, instr->num_components);
 
@@ -399,8 +399,16 @@ static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
 
 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
 {
-   ppir_error("nir_ssa_undef_instr not support\n");
-   return NULL;
+   nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
+   ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
+   if (!node)
+      return NULL;
+   ppir_alu_node *alu = ppir_node_to_alu(node);
+
+   ppir_dest *dest = &alu->dest;
+   dest->ssa.undef = true;
+
+   return node;
 }
 
 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
@@ -408,7 +416,12 @@ static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
    nir_tex_instr *instr = nir_instr_as_tex(ni);
    ppir_load_texture_node *node;
 
-   if (instr->op != nir_texop_tex) {
+   switch (instr->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+      break;
+   default:
       ppir_error("unsupported texop %d\n", instr->op);
       return NULL;
    }
@@ -425,6 +438,7 @@ static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
 
    switch (instr->sampler_dim) {
    case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_CUBE:
    case GLSL_SAMPLER_DIM_RECT:
    case GLSL_SAMPLER_DIM_EXTERNAL:
       break;
@@ -436,17 +450,24 @@ static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
    node->sampler_dim = instr->sampler_dim;
 
    for (int i = 0; i < instr->coord_components; i++)
-         node->src_coords.swizzle[i] = i;
+         node->src[0].swizzle[i] = i;
 
    for (int i = 0; i < instr->num_srcs; i++) {
       switch (instr->src[i].src_type) {
       case nir_tex_src_coord:
-         ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
+         ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
                            u_bit_consecutive(0, instr->coord_components));
+         node->num_src++;
+         break;
+      case nir_tex_src_bias:
+      case nir_tex_src_lod:
+         node->lod_bias_en = true;
+         node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
+         ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
+         node->num_src++;
          break;
       default:
          ppir_error("unsupported texture source type\n");
-         assert(0);
          return NULL;
       }
    }
@@ -463,8 +484,39 @@ static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
 
 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
 {
-   ppir_error("nir_jump_instr not support\n");
-   return NULL;
+   ppir_node *node;
+   ppir_compiler *comp = block->comp;
+   ppir_branch_node *branch;
+   ppir_block *jump_block;
+   nir_jump_instr *jump = nir_instr_as_jump(ni);
+
+   switch (jump->type) {
+   case nir_jump_break: {
+      assert(comp->current_block->successors[0]);
+      assert(!comp->current_block->successors[1]);
+      jump_block = comp->current_block->successors[0];
+   }
+   break;
+   case nir_jump_continue:
+      jump_block = comp->loop_cont_block;
+   break;
+   default:
+      ppir_error("nir_jump_instr not support\n");
+      return NULL;
+   }
+
+   assert(jump_block != NULL);
+
+   node = ppir_node_create(block, ppir_op_branch, -1, 0);
+   if (!node)
+      return NULL;
+   branch = ppir_node_to_branch(node);
+
+   /* Unconditional */
+   branch->num_src = 0;
+   branch->target = jump_block;
+
+   return node;
 }
 
 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
@@ -494,6 +546,8 @@ static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
 {
    ppir_block *block = ppir_get_block(comp, nblock);
 
+   comp->current_block = block;
+
    list_addtail(&block->list, &comp->block_list);
 
    nir_foreach_instr(instr, nblock) {
@@ -508,16 +562,99 @@ static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
    return true;
 }
 
-static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
+static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
+
+static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
 {
-   ppir_error("if nir_cf_node not support\n");
-   return false;
+   ppir_node *node;
+   ppir_branch_node *else_branch, *after_branch;
+   nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
+   bool empty_else_block =
+      (nir_else_block == nir_if_last_else_block(if_stmt) &&
+      exec_list_is_empty(&nir_else_block->instr_list));
+   ppir_block *block = comp->current_block;
+
+   node = ppir_node_create(block, ppir_op_branch, -1, 0);
+   if (!node)
+      return false;
+   else_branch = ppir_node_to_branch(node);
+   ppir_node_add_src(block->comp, node, &else_branch->src[0],
+                     &if_stmt->condition, 1);
+   else_branch->num_src = 1;
+   /* Negate condition to minimize branching. We're generating following:
+    * current_block: { ...; if (!statement) branch else_block; }
+    * then_block: { ...; branch after_block; }
+    * else_block: { ... }
+    * after_block: { ... }
+    *
+    * or if else list is empty:
+    * block: { if (!statement) branch else_block; }
+    * then_block: { ... }
+    * else_block: after_block: { ... }
+    */
+   else_branch->negate = true;
+   list_addtail(&else_branch->node.list, &block->node_list);
+
+   ppir_emit_cf_list(comp, &if_stmt->then_list);
+   if (empty_else_block) {
+      nir_block *nblock = nir_if_last_else_block(if_stmt);
+      assert(nblock->successors[0]);
+      assert(!nblock->successors[1]);
+      else_branch->target = ppir_get_block(comp, nblock->successors[0]);
+      /* Add empty else block to the list */
+      list_addtail(&block->successors[1]->list, &comp->block_list);
+      return true;
+   }
+
+   else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
+
+   nir_block *last_then_block = nir_if_last_then_block(if_stmt);
+   assert(last_then_block->successors[0]);
+   assert(!last_then_block->successors[1]);
+   block = ppir_get_block(comp, last_then_block);
+   node = ppir_node_create(block, ppir_op_branch, -1, 0);
+   if (!node)
+      return false;
+   after_branch = ppir_node_to_branch(node);
+   /* Unconditional */
+   after_branch->num_src = 0;
+   after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
+   /* Target should be after_block, will fixup later */
+   list_addtail(&after_branch->node.list, &block->node_list);
+
+   ppir_emit_cf_list(comp, &if_stmt->else_list);
+
+   return true;
 }
 
 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
 {
-   ppir_error("loop nir_cf_node not support\n");
-   return false;
+   ppir_block *save_loop_cont_block = comp->loop_cont_block;
+   ppir_block *block;
+   ppir_branch_node *loop_branch;
+   nir_block *loop_last_block;
+   ppir_node *node;
+
+   comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
+
+   ppir_emit_cf_list(comp, &nloop->body);
+
+   loop_last_block = nir_loop_last_block(nloop);
+   block = ppir_get_block(comp, loop_last_block);
+   node = ppir_node_create(block, ppir_op_branch, -1, 0);
+   if (!node)
+      return false;
+   loop_branch = ppir_node_to_branch(node);
+   /* Unconditional */
+   loop_branch->num_src = 0;
+   loop_branch->target = comp->loop_cont_block;
+   list_addtail(&loop_branch->node.list, &block->node_list);
+
+   comp->loop_cont_block = save_loop_cont_block;
+
+   comp->num_loops++;
+
+   return true;
 }
 
 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
@@ -603,7 +740,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
       ppir_node *prev_node = NULL;
       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
-            ppir_node_add_dep(prev_node, node);
+            ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
          }
          if (node->op == ppir_op_discard ||
              node->op == ppir_op_store_color ||
@@ -646,8 +783,10 @@ static void ppir_add_write_after_read_deps(ppir_compiler *comp)
                ppir_src *src = ppir_node_get_src(node, i);
                if (src && src->type == ppir_target_register &&
                    src->reg == reg &&
-                   write)
-                  ppir_node_add_dep(write, node);
+                   write) {
+                  ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
+                  ppir_node_add_dep(write, node, ppir_dep_write_after_read);
+               }
             }
             ppir_dest *dest = ppir_node_get_dest(node);
             if (dest && dest->type == ppir_target_register &&
@@ -699,6 +838,19 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
       }
    }
 
+   /* Validate outputs, we support only gl_FragColor */
+   nir_foreach_variable(var, &nir->outputs) {
+      switch (var->data.location) {
+      case FRAG_RESULT_COLOR:
+      case FRAG_RESULT_DATA0:
+         break;
+      default:
+         ppir_error("unsupported output type\n");
+         goto err_out0;
+         break;
+      }
+   }
+
    foreach_list_typed(nir_register, reg, node, &func->registers) {
       ppir_reg *r = rzalloc(comp, ppir_reg);
       if (!r)
@@ -706,8 +858,6 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
 
       r->index = reg->index;
       r->num_components = reg->num_components;
-      r->live_in = INT_MAX;
-      r->live_out = 0;
       r->is_head = false;
       list_addtail(&r->list, &comp->reg_list);
    }