freedreno/ir3: don't DCE ij_pix if used for pre-fs-texture-fetch
authorRob Clark <robdclark@chromium.org>
Fri, 11 Oct 2019 18:50:22 +0000 (11:50 -0700)
committerRob Clark <robdclark@gmail.com>
Fri, 18 Oct 2019 21:11:54 +0000 (21:11 +0000)
When we enable pre-dispatch texture fetch, we could have a scenario
where the barycentric i/j coord sysval is not used in the shader, but
only used for the varying fetch for the pre-dispatch texture fetch.
In this case we need to take care not to DCE this sysval.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_depth.c

index f7a87ee076a49770d55a796f2ba4fc65f9fead30..8b8788a8a97571a84fcc32177423b2e977d40e64 100644 (file)
@@ -1062,13 +1062,13 @@ void ir3_print(struct ir3 *ir);
 void ir3_print_instr(struct ir3_instruction *instr);
 
 /* depth calculation: */
+struct ir3_shader_variant;
 int ir3_delayslots(struct ir3_instruction *assigner,
                struct ir3_instruction *consumer, unsigned n);
 void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
-void ir3_depth(struct ir3 *ir);
+void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* copy-propagate: */
-struct ir3_shader_variant;
 void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* group neighbors and insert mov's to resolve conflicts: */
index 081d4cf19ec298e11819f6f93411261086f89c62..39bef63a7803d6547eaabbf1a0ef637832fa4da3 100644 (file)
@@ -3142,7 +3142,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                ir3_print(ir);
        }
 
-       ir3_depth(ir);
+       ir3_depth(ir, so);
 
        if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
                printf("AFTER DEPTH:\n");
index bc42bfdeb3baec06327e9dd1b15193918c21ae71..49feda7b19556516b3b2854c79814bb33b74a5c2 100644 (file)
@@ -27,6 +27,7 @@
 #include "util/u_math.h"
 
 #include "ir3.h"
+#include "ir3_shader.h"
 
 /*
  * Instruction Depth:
@@ -209,7 +210,7 @@ remove_unused_by_block(struct ir3_block *block)
 }
 
 static bool
-compute_depth_and_remove_unused(struct ir3 *ir)
+compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
 {
        unsigned i;
        bool progress = false;
@@ -221,6 +222,13 @@ compute_depth_and_remove_unused(struct ir3 *ir)
         */
        list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
                list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+                       /* special case, if pre-fs texture fetch used, we cannot
+                        * eliminate the barycentric i/j input
+                        */
+                       if (so->num_sampler_prefetch &&
+                                       (instr->opc == OPC_META_INPUT) &&
+                                       (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PIXEL))
+                               continue;
                        instr->flags |= IR3_INSTR_UNUSED;
                }
        }
@@ -263,10 +271,10 @@ compute_depth_and_remove_unused(struct ir3 *ir)
 }
 
 void
-ir3_depth(struct ir3 *ir)
+ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so)
 {
        bool progress;
        do {
-               progress = compute_depth_and_remove_unused(ir);
+               progress = compute_depth_and_remove_unused(ir, so);
        } while (progress);
 }