v3d: Add support for shader_image_load_store.

[mesa.git] / src / gallium / drivers / v3d / v3dx_draw.c
diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c

index ca0a1ab39c2f8f28298fe0c2f9d979565079c85d..7ec687031d661637610c630f7b7cfb23982d71f8 100644 (file)
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -124,11 +124,20 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
  {
          struct v3d_context *v3d = v3d_context(pctx);
  
+        /* XXX perf: If we're reading from the output of TF in this job, we
+         * should instead be using the wait for transform feedback
+         * functionality.
+         */
+
          /* Flush writes to textures we're sampling. */
          for (int i = 0; i < v3d->tex[s].num_textures; i++) {
-                struct pipe_sampler_view *view = v3d->tex[s].textures[i];
-                if (!view)
+                struct pipe_sampler_view *pview = v3d->tex[s].textures[i];
+                if (!pview)
                          continue;
+                struct v3d_sampler_view *view = v3d_sampler_view(pview);
+
+                if (view->texture != view->base.texture)
+                        v3d_update_shadow_texture(pctx, &view->base);
  
                  v3d_flush_jobs_writing_resource(v3d, view->texture);
          }
@@ -139,6 +148,13 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                  if (cb->buffer)
                          v3d_flush_jobs_writing_resource(v3d, cb->buffer);
          }
+
+        /* Flush writes to our image views */
+        foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
+                struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
+
+                v3d_flush_jobs_writing_resource(v3d, view->base.resource);
+        }
  }
  
  static void
@@ -171,6 +187,10 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                                      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
                                      32);
  
+        /* XXX perf: We should move most of the SHADER_STATE_RECORD setup to
+         * compile time, so that we mostly just have to OR the VS and FS
+         * records together at draw time.
+         */
          cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
                  shader.enable_clipping = true;
                  /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
@@ -446,6 +466,9 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
          for (int s = 0; s < PIPE_SHADER_TYPES; s++)
                  v3d_predraw_check_stage_inputs(pctx, s);
  
+        if (info->indirect)
+                v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer);
+
          struct v3d_job *job = v3d_get_job_for_fbo(v3d);
  
          /* If vertex texturing depends on the output of rendering, we need to
@@ -462,6 +485,23 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                  job->submit.in_sync_bcl = v3d->out_sync;
          }
  
+        /* Mark SSBOs as being written.  We don't actually know which ones are
+         * read vs written, so just assume the worst
+         */
+        for (int s = 0; s < PIPE_SHADER_TYPES; s++) {
+                foreach_bit(i, v3d->ssbo[s].enabled_mask) {
+                        v3d_job_add_write_resource(job,
+                                                   v3d->ssbo[s].sb[i].buffer);
+                        job->tmu_dirty_rcl = true;
+                }
+
+                foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
+                        v3d_job_add_write_resource(job,
+                                                   v3d->shaderimg[s].si[i].base.resource);
+                        job->tmu_dirty_rcl = true;
+                }
+        }
+
          /* Get space to emit our draw call into the BCL, using a branch to
           * jump to a new BO if necessary.
           */
@@ -543,7 +583,23 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                  }
  #endif
  
-                if (info->instance_count > 1) {
+                if (info->indirect) {
+                        cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
+                                prim.index_type = ffs(info->index_size) - 1;
+#if V3D_VERSION < 40
+                                prim.address_of_indices_list =
+                                        cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
+                                prim.mode = info->mode | prim_tf_enable;
+                                prim.enable_primitive_restarts = info->primitive_restart;
+
+                                prim.number_of_draw_indirect_indexed_records = info->indirect->draw_count;
+
+                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
+                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
+                                                          info->indirect->offset);
+                        }
+                } else if (info->instance_count > 1) {
                          cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
                                  prim.index_type = ffs(info->index_size) - 1;
  #if V3D_VERSION >= 40
@@ -580,7 +636,16 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                  if (info->has_user_indices)
                          pipe_resource_reference(&prsc, NULL);
          } else {
-                if (info->instance_count > 1) {
+                if (info->indirect) {
+                        cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
+                                prim.mode = info->mode | prim_tf_enable;
+                                prim.number_of_draw_indirect_array_records = info->indirect->draw_count;
+
+                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
+                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
+                                                          info->indirect->offset);
+                        }
+                } else if (info->instance_count > 1) {
                          cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
                                  prim.mode = info->mode | prim_tf_enable;
                                  prim.index_of_first_vertex = info->start;