v3d: add new flag dirty TMU cache at v3d_compiler
[mesa.git] / src / gallium / drivers / v3d / v3dx_draw.c
index c2b3ecd8a131a543b7d25a0ec5caf2d778a652f3..5795279b886a21d12f763545e66e7c19e68a6cb5 100644 (file)
@@ -156,22 +156,34 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                     view->base.format != PIPE_FORMAT_X32_S8X24_UINT)
                         v3d_update_shadow_texture(pctx, &view->base);
 
-                v3d_flush_jobs_writing_resource(v3d, view->texture, false);
+                v3d_flush_jobs_writing_resource(v3d, view->texture,
+                                                V3D_FLUSH_DEFAULT);
         }
 
         /* Flush writes to UBOs. */
         foreach_bit(i, v3d->constbuf[s].enabled_mask) {
                 struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
-                if (cb->buffer)
-                        v3d_flush_jobs_writing_resource(v3d, cb->buffer, false);
+                if (cb->buffer) {
+                        v3d_flush_jobs_writing_resource(v3d, cb->buffer,
+                                                        V3D_FLUSH_DEFAULT);
+                }
         }
 
-        /* Flush writes to our image views */
+        /* Flush reads/writes to our SSBOs */
+        foreach_bit(i, v3d->ssbo[s].enabled_mask) {
+                struct pipe_shader_buffer *sb = &v3d->ssbo[s].sb[i];
+                if (sb->buffer) {
+                        v3d_flush_jobs_reading_resource(v3d, sb->buffer,
+                                                        V3D_FLUSH_NOT_CURRENT_JOB);
+                }
+        }
+
+        /* Flush reads/writes to our image views */
         foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
                 struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
 
-                v3d_flush_jobs_writing_resource(v3d, view->base.resource,
-                                                false);
+                v3d_flush_jobs_reading_resource(v3d, view->base.resource,
+                                                V3D_FLUSH_NOT_CURRENT_JOB);
         }
 
         /* Flush writes to our vertex buffers (i.e. from transform feedback) */
@@ -180,7 +192,7 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                         struct pipe_vertex_buffer *vb = &v3d->vertexbuf.vb[i];
 
                         v3d_flush_jobs_writing_resource(v3d, vb->buffer.resource,
-                                                        false);
+                                                        V3D_FLUSH_DEFAULT);
                 }
         }
 }
@@ -200,7 +212,8 @@ v3d_predraw_check_outputs(struct pipe_context *pctx)
 
                         const struct pipe_stream_output_target *target =
                                 so->targets[i];
-                        v3d_flush_jobs_reading_resource(v3d, target->buffer);
+                        v3d_flush_jobs_reading_resource(v3d, target->buffer,
+                                                        V3D_FLUSH_DEFAULT);
                 }
         }
 }
@@ -336,6 +349,11 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 v3d_write_uniforms(v3d, v3d->prog.cs,
                                    PIPE_SHADER_VERTEX);
 
+        /* Update the cache dirty flag based on the shader progs data */
+        job->tmu_dirty_rcl |= v3d->prog.cs->prog_data.vs->base.tmu_dirty_rcl;
+        job->tmu_dirty_rcl |= v3d->prog.vs->prog_data.vs->base.tmu_dirty_rcl;
+        job->tmu_dirty_rcl |= v3d->prog.fs->prog_data.fs->base.tmu_dirty_rcl;
+
         /* See GFXH-930 workaround below */
         uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
         uint32_t shader_rec_offset =
@@ -540,34 +558,23 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
         v3d_bo_unreference(&cs_uniforms.bo);
         v3d_bo_unreference(&vs_uniforms.bo);
         v3d_bo_unreference(&fs_uniforms.bo);
-
-        job->shader_rec_count++;
 }
 
 /**
- * Computes the various transform feedback statistics, since they can't be
- * recorded by CL packets.
+ * Updates the number of primitvies generated from the number of vertices
+ * to draw. We do this here instead of using PRIMITIVE_COUNTS_FEEDBACK because
+ * using the GPU packet for this might require sync waits and this is trivial
+ * to handle in the CPU instead.
  */
 static void
-v3d_tf_statistics_record(struct v3d_context *v3d,
-                         const struct pipe_draw_info *info)
+v3d_update_primitives_generated_counter(struct v3d_context *v3d,
+                                        const struct pipe_draw_info *info)
 {
         if (!v3d->active_queries)
                 return;
 
         uint32_t prims = u_prims_for_vertices(info->mode, info->count);
         v3d->prims_generated += prims;
-
-        if (v3d->streamout.num_targets <= 0)
-                return;
-
-        /* XXX: Only count if we didn't overflow. */
-        v3d->tf_prims_generated += prims;
-        for (int i = 0; i < v3d->streamout.num_targets; i++) {
-                struct v3d_stream_output_target *target =
-                        v3d_stream_output_target(v3d->streamout.targets[i]);
-                target->recorded_vertex_count += info->count;
-        }
 }
 
 static void
@@ -660,11 +667,22 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         if (info->indirect) {
                 v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer,
-                                                false);
+                                                V3D_FLUSH_DEFAULT);
         }
 
         v3d_predraw_check_outputs(pctx);
 
+        /* If transform feedback is active and we are switching primitive type
+         * we need to submit the job before drawing and update the vertex count
+         * written to TF based on the primitive type since we will need to
+         * know the exact vertex count if the application decides to call
+         * glDrawTransformFeedback() later.
+         */
+        if (v3d->streamout.num_targets > 0 &&
+            u_base_prim_type(info->mode) != u_base_prim_type(v3d->prim_mode)) {
+                v3d_tf_update_counters(v3d);
+        }
+
         struct v3d_job *job = v3d_get_job_for_fbo(v3d);
 
         /* If vertex texturing depends on the output of rendering, we need to
@@ -681,8 +699,8 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 job->submit.in_sync_bcl = v3d->out_sync;
         }
 
-        /* Mark SSBOs as being written.  We don't actually know which ones are
-         * read vs written, so just assume the worst
+        /* Mark SSBOs and images as being written.  We don't actually know
+         * which ones are read vs written, so just assume the worst.
          */
         for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) {
                 foreach_bit(i, v3d->ssbo[s].enabled_mask) {
@@ -762,7 +780,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
 #endif
 
-        v3d_tf_statistics_record(v3d, info);
+        v3d_update_primitives_generated_counter(v3d, info);
 
         /* Note that the primitive type fields match with OpenGL/gallium
          * definitions, up to but not including QUADS.
@@ -837,8 +855,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                         }
                 }
 
-                job->draw_calls_queued++;
-
                 if (info->has_user_indices)
                         pipe_resource_reference(&prsc, NULL);
         } else {
@@ -884,6 +900,8 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT, flush);
 
         job->draw_calls_queued++;
+        if (v3d->streamout.num_targets)
+           job->tf_draw_calls_queued++;
 
         /* Increment the TF offsets by how many verts we wrote.  XXX: This
          * needs some clamping to the buffer size.