v3d: Block bin on render when doing vertex texturing.
[mesa.git] / src / gallium / drivers / v3d / v3dx_draw.c
index 97127a1b69b7ff1ad7bd87f71da938b7ebdfd97e..af079bd0d5a0b4c87fa394fce6f18c4ebab90302 100644 (file)
@@ -55,7 +55,7 @@ v3d_start_draw(struct v3d_context *v3d)
         job->submit.bcl_start = job->bcl.bo->offset;
         v3d_job_add_bo(job, job->bcl.bo);
 
-        job->tile_alloc = v3d_bo_alloc(v3d->screen, 1024 * 1024, "tile alloc");
+        job->tile_alloc = v3d_bo_alloc(v3d->screen, 1024 * 1024, "tile_alloc");
         uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
         job->tile_state = v3d_bo_alloc(v3d->screen,
                                        job->draw_tiles_y *
@@ -78,10 +78,10 @@ v3d_start_draw(struct v3d_context *v3d)
 
         cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
 #if V3D_VERSION >= 40
-                config.width_in_pixels_minus_1 = v3d->framebuffer.width - 1;
-                config.height_in_pixels_minus_1 = v3d->framebuffer.height - 1;
-                config.number_of_render_targets_minus_1 =
-                        MAX2(v3d->framebuffer.nr_cbufs, 1) - 1;
+                config.width_in_pixels = v3d->framebuffer.width;
+                config.height_in_pixels = v3d->framebuffer.height;
+                config.number_of_render_targets =
+                        MAX2(v3d->framebuffer.nr_cbufs, 1);
 #else /* V3D_VERSION < 40 */
                 config.tile_state_data_array_base_address =
                         cl_address(job->tile_state, 0);
@@ -178,7 +178,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                          v3d->prog.fs->prog_data.fs->discard);
 
                 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
-                        v3d->prog.fs->prog_data.fs->uses_centroid_and_center_w;
+                        v3d->prog.fs->prog_data.fs->uses_center_w;
 
                 shader.number_of_varyings_in_fragment_shader =
                         v3d->prog.fs->prog_data.base->num_inputs;
@@ -214,6 +214,9 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 shader.fragment_shader_uniforms_address = fs_uniforms;
 
 #if V3D_VERSION >= 41
+                shader.min_coord_shader_input_segments_required_in_play = 1;
+                shader.min_vertex_shader_input_segments_required_in_play = 1;
+
                 shader.coordinate_shader_4_way_threadable =
                         v3d->prog.cs->prog_data.vs->base.threads == 4;
                 shader.vertex_shader_4_way_threadable =
@@ -422,6 +425,20 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         struct v3d_job *job = v3d_get_job_for_fbo(v3d);
 
+        /* If vertex texturing depends on the output of rendering, we need to
+         * ensure that that rendering is complete before we run a coordinate
+         * shader that depends on it.
+         *
+         * Given that doing that is unusual, for now we just block the binner
+         * on the last submitted render, rather than tracking the last
+         * rendering to each texture's BO.
+         */
+        if (v3d->verttex.num_textures) {
+                perf_debug("Blocking binner on last render "
+                           "due to vertex texturing.\n");
+                job->submit.in_sync_bcl = v3d->out_sync;
+        }
+
         /* Get space to emit our draw call into the BCL, using a branch to
          * jump to a new BO if necessary.
          */
@@ -555,33 +572,57 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                         }
                 }
         }
+
+        /* A flush is required in between a TF draw and any following TF specs
+         * packet, or the GPU may hang.  Just flush each time for now.
+         */
+        if (v3d->streamout.num_targets)
+                cl_emit(&job->bcl, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT, flush);
+
         job->draw_calls_queued++;
 
-        if (v3d->zsa && job->zsbuf &&
-            (v3d->zsa->base.depth.enabled ||
-             v3d->zsa->base.stencil[0].enabled)) {
+        /* Increment the TF offsets by how many verts we wrote.  XXX: This
+         * needs some clamping to the buffer size.
+         */
+        for (int i = 0; i < v3d->streamout.num_targets; i++)
+                v3d->streamout.offsets[i] += info->count;
+
+        if (v3d->zsa && job->zsbuf && v3d->zsa->base.depth.enabled) {
                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
                 v3d_job_add_bo(job, rsc->bo);
 
-                if (v3d->zsa->base.depth.enabled) {
-                        job->resolve |= PIPE_CLEAR_DEPTH;
-                        rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
-                }
+                job->load |= PIPE_CLEAR_DEPTH & ~job->clear;
+                if (v3d->zsa->base.depth.writemask)
+                        job->store |= PIPE_CLEAR_DEPTH;
+                rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
+        }
 
-                if (v3d->zsa->base.stencil[0].enabled) {
-                        job->resolve |= PIPE_CLEAR_STENCIL;
-                        rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
+        if (v3d->zsa && job->zsbuf && v3d->zsa->base.stencil[0].enabled) {
+                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
+                if (rsc->separate_stencil)
+                        rsc = rsc->separate_stencil;
+
+                v3d_job_add_bo(job, rsc->bo);
+
+                job->load |= PIPE_CLEAR_STENCIL & ~job->clear;
+                if (v3d->zsa->base.stencil[0].writemask ||
+                    v3d->zsa->base.stencil[1].writemask) {
+                        job->store |= PIPE_CLEAR_STENCIL;
                 }
+                rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
         }
 
         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+                int blend_rt = v3d->blend->base.independent_blend_enable ? i : 0;
 
-                if (job->resolve & bit || !job->cbufs[i])
+                if (job->store & bit || !job->cbufs[i])
                         continue;
                 struct v3d_resource *rsc = v3d_resource(job->cbufs[i]->texture);
 
-                job->resolve |= bit;
+                job->load |= bit & ~job->clear;
+                if (v3d->blend->base.rt[blend_rt].colormask)
+                        job->store |= bit;
                 v3d_job_add_bo(job, rsc->bo);
         }
 
@@ -595,20 +636,60 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 v3d_flush(pctx);
 }
 
+/**
+ * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
+ */
 static void
-v3d_clear(struct pipe_context *pctx, unsigned buffers,
-          const union pipe_color_union *color, double depth, unsigned stencil)
+v3d_draw_clear(struct v3d_context *v3d,
+               unsigned buffers,
+               const union pipe_color_union *color,
+               double depth, unsigned stencil)
 {
-        struct v3d_context *v3d = v3d_context(pctx);
-        struct v3d_job *job = v3d_get_job_for_fbo(v3d);
+        static const union pipe_color_union dummy_color = {};
 
-        /* We can't flag new buffers for clearing once we've queued draws.  We
-         * could avoid this by using the 3d engine to clear.
+        /* The blitter util dereferences the color regardless, even though the
+         * gallium clear API may not pass one in when only Z/S are cleared.
          */
+        if (!color)
+                color = &dummy_color;
+
+        v3d_blitter_save(v3d);
+        util_blitter_clear(v3d->blitter,
+                           v3d->framebuffer.width,
+                           v3d->framebuffer.height,
+                           util_framebuffer_get_num_layers(&v3d->framebuffer),
+                           buffers, color, depth, stencil);
+}
+
+/**
+ * Attempts to perform the GL clear by using the TLB's fast clear at the start
+ * of the frame.
+ */
+static unsigned
+v3d_tlb_clear(struct v3d_job *job, unsigned buffers,
+              const union pipe_color_union *color,
+              double depth, unsigned stencil)
+{
+        struct v3d_context *v3d = job->v3d;
+
         if (job->draw_calls_queued) {
-                perf_debug("Flushing rendering to process new clear.\n");
-                v3d_job_submit(v3d, job);
-                job = v3d_get_job_for_fbo(v3d);
+                /* If anything in the CL has drawn using the buffer, then the
+                 * TLB clear we're trying to add now would happen before that
+                 * drawing.
+                 */
+                buffers &= ~(job->load | job->store);
+        }
+
+        /* GFXH-1461: If we were to emit a load of just depth or just stencil,
+         * then the clear for the other may get lost.  We need to decide now
+         * if it would be possible to need to emit a load of just one after
+         * we've set up our TLB clears.
+         */
+        if (buffers & PIPE_CLEAR_DEPTHSTENCIL &&
+            (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL &&
+            job->zsbuf &&
+            util_format_is_depth_and_stencil(job->zsbuf->texture->format)) {
+                buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
         }
 
         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
@@ -684,10 +765,25 @@ v3d_clear(struct pipe_context *pctx, unsigned buffers,
         job->draw_min_y = 0;
         job->draw_max_x = v3d->framebuffer.width;
         job->draw_max_y = v3d->framebuffer.height;
-        job->cleared |= buffers;
-        job->resolve |= buffers;
+        job->clear |= buffers;
+        job->store |= buffers;
 
         v3d_start_draw(v3d);
+
+        return buffers;
+}
+
+static void
+v3d_clear(struct pipe_context *pctx, unsigned buffers,
+          const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_job *job = v3d_get_job_for_fbo(v3d);
+
+        buffers &= ~v3d_tlb_clear(job, buffers, color, depth, stencil);
+
+        if (buffers)
+                v3d_draw_clear(v3d, buffers, color, depth, stencil);
 }
 
 static void