v3d: add new flag dirty TMU cache at v3d_compiler
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 14 Aug 2019 07:27:13 +0000 (09:27 +0200)
committerJose Maria Casanova Crespo <jmcasanova@igalia.com>
Fri, 18 Oct 2019 12:08:52 +0000 (14:08 +0200)
That we set for any TMU write on spills and general tmu. It is then
used as part of v3d_emit_gl_shader_state later.

v2: add a new flag instead at v3d_compiler instead of dirty the flag
    at v3dx if there is any spill (change suggested by Eric, added by
    Alejandro)

v3: set this for anything that is not a load and do it also in
    v3d40_vir_emit_image_load_store (Eric)

Reviewed-by: Eric Anholt <eric@anholt.net>
src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d40_tex.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_register_allocate.c
src/gallium/drivers/v3d/v3dx_draw.c

index 1ca7c2fc646f2ed45b8ec283c6607c3c4ba211dd..2de7f7e32b088cb3704a51f4b5020f1f88ff4fec 100644 (file)
@@ -208,6 +208,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                         instr->intrinsic == nir_intrinsic_load_scratch ||
                         instr->intrinsic == nir_intrinsic_load_shared);
 
                         instr->intrinsic == nir_intrinsic_load_scratch ||
                         instr->intrinsic == nir_intrinsic_load_shared);
 
+        if (!is_load)
+                c->tmu_dirty_rcl = true;
+
         bool has_index = !is_shared_or_scratch;
 
         int offset_src;
         bool has_index = !is_shared_or_scratch;
 
         int offset_src;
index 9ee7df214212b4a8ebc3763d9d2c3b313058ba38..287116381fb8f8eb6456eab57bf9a6aa7b35b33c 100644 (file)
@@ -410,4 +410,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
 
         if (nir_intrinsic_dest_components(instr) == 0)
                 vir_TMUWT(c);
 
         if (nir_intrinsic_dest_components(instr) == 0)
                 vir_TMUWT(c);
+
+        if (instr->intrinsic != nir_intrinsic_image_deref_load)
+                c->tmu_dirty_rcl = true;
 }
 }
index b61119f5615000a9137b72fceb3077f03e9567b8..fbb4b64a3655abeb5813128129af6db027985926 100644 (file)
@@ -639,6 +639,8 @@ struct v3d_compile {
         bool lock_scoreboard_on_first_thrsw;
 
         bool failed;
         bool lock_scoreboard_on_first_thrsw;
 
         bool failed;
+
+        bool tmu_dirty_rcl;
 };
 
 struct v3d_uniform_list {
 };
 
 struct v3d_uniform_list {
@@ -658,6 +660,8 @@ struct v3d_prog_data {
          * after-final-THRSW state.
          */
         bool single_seg;
          * after-final-THRSW state.
          */
         bool single_seg;
+
+        bool tmu_dirty_rcl;
 };
 
 struct v3d_vs_prog_data {
 };
 
 struct v3d_vs_prog_data {
index b86ffc82ea39f7e769eb313f86525138cab71d37..dc5d3fe3bed6d708124adddc70c1dccec20bde06 100644 (file)
@@ -710,6 +710,7 @@ v3d_set_prog_data(struct v3d_compile *c,
         prog_data->threads = c->threads;
         prog_data->single_seg = !c->last_thrsw;
         prog_data->spill_size = c->spill_size;
         prog_data->threads = c->threads;
         prog_data->single_seg = !c->last_thrsw;
         prog_data->spill_size = c->spill_size;
+        prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
 
         v3d_set_prog_data_uniforms(c, prog_data);
 
 
         v3d_set_prog_data_uniforms(c, prog_data);
 
index 7583acf155c18d32f02117a757ddecaf9b1882c6..623cc22cefad74390ce88a9284ae52dad50d059e 100644 (file)
@@ -270,6 +270,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
                                 vir_emit_thrsw(c);
                                 vir_TMUWT(c);
                                 c->spills++;
                                 vir_emit_thrsw(c);
                                 vir_TMUWT(c);
                                 c->spills++;
+                                c->tmu_dirty_rcl = true;
                         }
                 }
 
                         }
                 }
 
index b0b52fa00b66e939fa91b97653a1cf1864e13a79..5795279b886a21d12f763545e66e7c19e68a6cb5 100644 (file)
@@ -349,6 +349,11 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 v3d_write_uniforms(v3d, v3d->prog.cs,
                                    PIPE_SHADER_VERTEX);
 
                 v3d_write_uniforms(v3d, v3d->prog.cs,
                                    PIPE_SHADER_VERTEX);
 
+        /* Update the cache dirty flag based on the shader progs data */
+        job->tmu_dirty_rcl |= v3d->prog.cs->prog_data.vs->base.tmu_dirty_rcl;
+        job->tmu_dirty_rcl |= v3d->prog.vs->prog_data.vs->base.tmu_dirty_rcl;
+        job->tmu_dirty_rcl |= v3d->prog.fs->prog_data.fs->base.tmu_dirty_rcl;
+
         /* See GFXH-930 workaround below */
         uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
         uint32_t shader_rec_offset =
         /* See GFXH-930 workaround below */
         uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
         uint32_t shader_rec_offset =