lima/ppir: rework store output

author Erico Nunes <nunes.erico@gmail.com>

Tue, 14 Apr 2020 00:45:38 +0000 (02:45 +0200)

committer Erico Nunes <nunes.erico@gmail.com>

Sat, 9 May 2020 12:40:34 +0000 (14:40 +0200)
author Erico Nunes <nunes.erico@gmail.com>
Tue, 14 Apr 2020 00:45:38 +0000 (02:45 +0200)
committer Erico Nunes <nunes.erico@gmail.com>
Sat, 9 May 2020 12:40:34 +0000 (14:40 +0200)
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c

index ce2c10732ce2e33d0e112fd0223d250af88735ae..4420ee30e05931774e6ee8f5a78f177ff643dc3d 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -201,7 +201,6 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
        f->op = shift_to_op(alu->shift);
        break;
     case ppir_op_mov:
-   case ppir_op_store_color:
        f->op = ppir_codegen_vec4_mul_op_mov;
        break;
     case ppir_op_max:
@@ -344,7 +343,6 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
        f->op = ppir_codegen_vec4_acc_op_add;
        break;
     case ppir_op_mov:
-   case ppir_op_store_color:
        f->op = ppir_codegen_vec4_acc_op_mov;
        break;
     case ppir_op_sum3:
diff --git a/src/gallium/drivers/lima/ir/pp/liveness.c b/src/gallium/drivers/lima/ir/pp/liveness.c

index b17564b089dbbfd0949130613996a2501889a311..20954bd08bec63c90160468f1206cec8b35f4c62 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/liveness.c
+++ b/src/gallium/drivers/lima/ir/pp/liveness.c
@@ -178,7 +178,6 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
        switch(node->op) {
           case ppir_op_const:
           case ppir_op_undef:
-         case ppir_op_store_color: /* never clear dest if its store output */
              continue;
           default:
              break;
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c

index 3c02c0a811c985198ac1877c48af27507b7c9d1c..620ecbed4d23084a918bbaf773b8c654fb56d807 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -352,7 +352,26 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
        return true;
  
     case nir_intrinsic_store_output: {
-      alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
+      /* In simple cases where the store_output is ssa, that register
+       * can be directly marked as the output.
+       * If discard is used or the source is not ssa, things can get a
+       * lot more complicated, so don't try to optimize those and fall
+       * back to inserting a mov at the end.
+       * If the source node will only be able to output to pipeline
+       * registers, fall back to the mov as well. */
+      if (!block->comp->uses_discard && instr->src->is_ssa) {
+         node = block->comp->var_nodes[instr->src->ssa->index];
+         switch (node->op) {
+         case ppir_op_load_uniform:
+         case ppir_op_const:
+            break;
+         default:
+            node->is_end = 1;
+            return true;
+         }
+      }
+
+      alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
        if (!alu_node)
           return false;
  
@@ -370,6 +389,8 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
        ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
                          u_bit_consecutive(0, instr->num_components));
  
+      alu_node->node.is_end = 1;
+
        list_addtail(&alu_node->node.list, &block->node_list);
        return true;
     }
@@ -724,9 +745,9 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
  static void ppir_add_ordering_deps(ppir_compiler *comp)
  {
     /* Some intrinsics do not have explicit dependencies and thus depend
-    * on instructions order. Consider discard_if and store_ouput as
-    * example. If we don't add fake dependency of discard_if to store_output
-    * scheduler may put store_output first and since store_output terminates
+    * on instructions order. Consider discard_if and the is_end node as
+    * example. If we don't add fake dependency of discard_if to is_end,
+    * scheduler may put the is_end first and since is_end terminates
      * shader on Utgard PP, rest of it will never be executed.
      * Add fake dependencies for discard/branch/store to preserve
      * instruction order.
@@ -753,8 +774,8 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
           if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
              ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
           }
-         if (node->op == ppir_op_discard ||
-             node->op == ppir_op_store_color ||
+         if (node->is_end ||
+             node->op == ppir_op_discard ||
               node->op == ppir_op_store_temp ||
               node->op == ppir_op_branch) {
              prev_node = node;
@@ -818,6 +839,7 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
        return false;
  
     comp->ra = ra;
+   comp->uses_discard = nir->info.fs.uses_discard;
  
     /* 1st pass: create ppir blocks */
     nir_foreach_function(function, nir) {
diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c

index fdf444af0bbf3d79e0c505d2cdf5902374f8419e..b71cbc3763e607834f6e54ba8a2e141f6def2a22 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@@ -313,14 +313,6 @@ const ppir_op_info ppir_op_infos[] = {
        .name = "const",
        .type = ppir_node_type_const,
     },
-   [ppir_op_store_color] = {
-      .name = "st_col",
-      .type = ppir_node_type_alu,
-      .slots = (int []) {
-         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
-         PPIR_INSTR_SLOT_END
-      },
-   },
     [ppir_op_store_temp] = {
        .name = "st_temp",
        .type = ppir_node_type_store,
@@ -632,6 +624,11 @@ ppir_node *ppir_node_insert_mov(ppir_node *node)
     ppir_node_add_dep(move, node, ppir_dep_src);
     list_addtail(&move->list, &node->list);
  
+   if (node->is_end) {
+      node->is_end = false;
+      move->is_end = true;
+   }
+
     return move;
  }
  
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c

index 35cf5f10f63d7c4205c3c4d19101bede63c62860..14b8c41f302be1becd792275188594afcf085c13 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -75,7 +75,7 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node)
     return ppir_instr_insert_node(succ->instr, node);
  }
  
-static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_node **next)
+static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
  {
     switch (node->type) {
     case ppir_node_type_alu:
@@ -105,9 +105,6 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
        if (!node->instr && !create_new_instr(block, node))
           return false;
  
-      if (node->op == ppir_op_store_color)
-         node->instr->is_end = true;
-
        break;
     }
     case ppir_node_type_load:
@@ -195,15 +192,13 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
  
  static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
  {
-   ppir_node *next = node;
-
     /* first try pipeline sched, if that didn't succeed try normal scheduling */
     if (!ppir_do_node_to_instr_try_insert(block, node))
-      if (!ppir_do_one_node_to_instr(block, node, &next))
+      if (!ppir_do_one_node_to_instr(block, node))
           return false;
  
-   /* next may have been updated in ppir_do_one_node_to_instr */
-   node = next;
+   if (node->is_end)
+      node->instr->is_end = true;
  
     /* we have to make sure the dep not be destroyed (due to
      * succ change) in ppir_do_node_to_instr, otherwise we can't
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h

index cec918d50f31d7da07ce03675688c221d70fd0a8..59887a70ec96b6acf0c14b1bb9404113a3f0bc67 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -108,7 +108,6 @@ typedef enum {
     ppir_op_load_temp,
  
     ppir_op_store_temp,
-   ppir_op_store_color,
  
     ppir_op_const,
  
@@ -162,6 +161,7 @@ typedef struct ppir_node {
     struct ppir_instr *instr;
     int instr_pos;
     struct ppir_block *block;
+   bool is_end;
  
     /* for scheduler */
     struct list_head succ_list;
@@ -385,6 +385,7 @@ typedef struct ppir_compiler {
  
     struct ra_regs *ra;
     struct lima_fs_shader_state *prog;
+   bool uses_discard;
  
     /* for scheduler */
     int sched_instr_base;
diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c

index 4b5d3a8bb1d42307a4884784a974330737b8fbe3..5c5d13e5b9df9db95cfdd86f484f5e842de9ffad 100644 (file)
--- a/src/gallium/drivers/lima/ir/pp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/pp/regalloc.c
@@ -138,7 +138,7 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
  {
     list_for_each_entry(ppir_block, block, &comp->block_list, list) {
        list_for_each_entry(ppir_node, node, &block->node_list, list) {
-         if (node->op == ppir_op_store_color)
+         if (node->is_end)
              continue;
  
           if (!node->instr || node->op == ppir_op_const)
author	Erico Nunes <nunes.erico@gmail.com>
	Tue, 14 Apr 2020 00:45:38 +0000 (02:45 +0200)
committer	Erico Nunes <nunes.erico@gmail.com>
	Sat, 9 May 2020 12:40:34 +0000 (14:40 +0200)
src/gallium/drivers/lima/ir/pp/codegen.c		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/liveness.c		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/nir.c		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/node.c		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/node_to_instr.c		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/ppir.h		patch \| blob \| history
src/gallium/drivers/lima/ir/pp/regalloc.c		patch \| blob \| history