freedreno/a6xx: fix hangs with newer sqe fw
[mesa.git] / src / gallium / drivers / v3d / v3dx_rcl.c
index 01a907b0a86042bc481fa4eeb42eebc13c82971d..8706ac0fc3ff506376d424f990f17b1a4aa99f1e 100644 (file)
@@ -207,7 +207,7 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl)
 {
         uint32_t loads_pending = job->load;
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(loads_pending & bit))
                         continue;
@@ -305,7 +305,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
          * perspective.  Non-MSAA surfaces will use
          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
          */
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(job->store & bit))
                         continue;
@@ -374,6 +374,15 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
                 }
         }
 #else /* V3D_VERSION >= 40 */
+        /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
+         * we still need to emit some sort of store.
+         */
+        if (!job->store) {
+                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+                        store.buffer_to_store = NONE;
+                }
+        }
+
         assert(!stores_pending);
 
         /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
@@ -498,7 +507,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
         v3d_job_add_bo(job, job->rcl.bo);
 
         int nr_cbufs = 0;
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (job->cbufs[i])
                         nr_cbufs = i + 1;
         }
@@ -761,7 +770,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
 
         v3d_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
 
-        /* XXX: Use Morton order */
+        /* XXX perf: We should expose GL_MESA_tile_raster_order to improve X11
+         * performance, but we should use Morton order otherwise to improve
+         * cache locality.
+         */
         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
@@ -783,5 +795,20 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 }
         }
 
+        if (job->tmu_dirty_rcl) {
+           cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) {
+              flush.tmu_config_cache_clear = 0xf;
+              flush.tmu_data_cache_clear = 0xf;
+              flush.uniforms_cache_clear = 0xf;
+              flush.instruction_cache_clear = 0xf;
+           }
+
+           cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) {
+              flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN;
+              flush.l2t_flush_start = cl_address(NULL, 0);
+              flush.l2t_flush_end = cl_address(NULL, ~0);
+           }
+        }
+
         cl_emit(&job->rcl, END_OF_RENDERING, end);
 }