vc4: move the draw splitting routine to shared code
[mesa.git] / src / gallium / drivers / vc4 / vc4_draw.c
index fdf983dae7f37d4fd5ebee07c43a61d9d4bb13cf..3da60ff64a8374678f3e668068631cd209835215 100644 (file)
@@ -24,8 +24,9 @@
 
 #include "util/u_blitter.h"
 #include "util/u_prim.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_pack_color.h"
+#include "util/u_split_draw.h"
 #include "util/u_upload_mgr.h"
 #include "indices/u_primconvert.h"
 
@@ -40,7 +41,7 @@ vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
         /* The SW-5891 workaround may cause us to emit multiple shader recs
          * and draw packets.
          */
-        int num_draws = DIV_ROUND_UP(vert_count, 65535) + 1;
+        int num_draws = DIV_ROUND_UP(vert_count, 65535 - 2) + 1;
 
         /* Binner gets our packet state -- vc4_emit.c contents,
          * and the primitive itself.
@@ -116,12 +117,13 @@ vc4_predraw_check_textures(struct pipe_context *pctx,
         struct vc4_context *vc4 = vc4_context(pctx);
 
         for (int i = 0; i < stage_tex->num_textures; i++) {
-                struct pipe_sampler_view *view = stage_tex->textures[i];
+                struct vc4_sampler_view *view =
+                        vc4_sampler_view(stage_tex->textures[i]);
                 if (!view)
                         continue;
-                struct vc4_resource *rsc = vc4_resource(view->texture);
-                if (rsc->shadow_parent)
-                        vc4_update_shadow_baselevel_texture(pctx, view);
+
+                if (view->texture != view->base.texture)
+                        vc4_update_shadow_baselevel_texture(pctx, &view->base);
 
                 vc4_flush_jobs_writing_resource(vc4, view->texture);
         }
@@ -221,6 +223,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
                         attr.coordinate_shader_vpm_offset = 0;
                         attr.vertex_shader_vpm_offset = 0;
                 }
+
+                vc4_bo_unreference(&bo);
         }
 
         cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
@@ -285,6 +289,7 @@ static void
 vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
+        struct pipe_draw_info local_info;
 
        if (!info->count_from_stream_output && !info->indirect &&
            !info->primitive_restart &&
@@ -292,11 +297,19 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                return;
 
         if (info->mode >= PIPE_PRIM_QUADS) {
-                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
-                util_primconvert_draw_vbo(vc4->primconvert, info);
-                perf_debug("Fallback conversion for %d %s vertices\n",
-                           info->count, u_prim_name(info->mode));
-                return;
+                if (info->mode == PIPE_PRIM_QUADS &&
+                    info->count == 4 &&
+                    !vc4->rasterizer->base.flatshade) {
+                        local_info = *info;
+                        local_info.mode = PIPE_PRIM_TRIANGLE_FAN;
+                        info = &local_info;
+                } else {
+                        util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
+                        util_primconvert_draw_vbo(vc4->primconvert, info);
+                        perf_debug("Fallback conversion for %d %s vertices\n",
+                                   info->count, u_prim_name(info->mode));
+                        return;
+                }
         }
 
         /* Before setting up the draw, do any fixup blits necessary. */
@@ -307,6 +320,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 
+        /* Make sure that the raster order flags haven't changed, which can
+         * only be set at job granularity.
+         */
+        if (job->flags != vc4->rasterizer->tile_raster_order_flags) {
+                vc4_job_submit(vc4, job);
+                job = vc4_get_job_for_fbo(vc4);
+        }
+
         vc4_get_draw_cl_space(job, info->count);
 
         if (vc4->prim_mode != info->mode) {
@@ -322,6 +343,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         vc4_emit_state(pctx);
 
+        bool needs_drawarrays_shader_state = false;
+
         if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
                            VC4_DIRTY_VTXSTATE |
                            VC4_DIRTY_PRIM_MODE |
@@ -333,7 +356,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                            vc4->prog.vs->uniform_dirty_bits |
                            vc4->prog.fs->uniform_dirty_bits)) ||
             vc4->last_index_bias != info->index_bias) {
-                vc4_emit_gl_shader_state(vc4, info, 0);
+                if (info->index_size)
+                        vc4_emit_gl_shader_state(vc4, info, 0);
+                else
+                        needs_drawarrays_shader_state = true;
         }
 
         vc4->dirty = 0;
@@ -364,7 +390,25 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 struct vc4_resource *rsc = vc4_resource(prsc);
 
                 struct vc4_cl_out *bcl = cl_start(&job->bcl);
-                cl_start_reloc(&job->bcl, &bcl, 1);
+
+                /* The original design for the VC4 kernel UABI had multiple
+                 * packets that used relocations in the BCL (some of which
+                 * needed two BOs), but later modifications eliminated all but
+                 * this one usage.  We have an arbitrary 32-bit offset value,
+                 * and need to also supply an arbitrary 32-bit index buffer
+                 * GEM handle, so we have this fake packet we emit in our BCL
+                 * to be validated, which the kernel uses at validation time
+                 * to perform the relocation in the IB packet (without
+                 * emitting to the actual HW).
+                 */
+                uint32_t hindex = vc4_gem_hindex(job, rsc->bo);
+                if (job->last_gem_handle_hindex != hindex) {
+                        cl_u8(&bcl, VC4_PACKET_GEM_HANDLES);
+                        cl_u32(&bcl, hindex);
+                        cl_u32(&bcl, 0);
+                        job->last_gem_handle_hindex = hindex;
+                }
+
                 cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
                 cl_u8(&bcl,
                       info->mode |
@@ -372,8 +416,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                        VC4_INDEX_BUFFER_U16:
                        VC4_INDEX_BUFFER_U8));
                 cl_u32(&bcl, info->count);
-                cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
+                cl_u32(&bcl, offset);
                 cl_u32(&bcl, vc4->max_index);
+
                 cl_end(&job->bcl, bcl);
                 job->draw_calls_queued++;
 
@@ -383,60 +428,35 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 uint32_t count = info->count;
                 uint32_t start = info->start;
                 uint32_t extra_index_bias = 0;
+                static const uint32_t max_verts = 65535;
+
+                /* GFXH-515 / SW-5891: The binner emits 16 bit indices for
+                 * drawarrays, which means that if start + count > 64k it
+                 * would truncate the top bits.  Work around this by emitting
+                 * a limited number of primitives at a time and reemitting the
+                 * shader state pointing farther down the vertex attribute
+                 * arrays.
+                 *
+                 * To do this properly for line loops or trifans, we'd need to
+                 * make a new VB containing the first vertex plus whatever
+                 * remainder.
+                 */
+                if (start + count > max_verts) {
+                        extra_index_bias = start;
+                        start = 0;
+                        needs_drawarrays_shader_state = true;
+                }
 
                 while (count) {
                         uint32_t this_count = count;
-                        uint32_t step = count;
-                        static const uint32_t max_verts = 65535;
-
-                        /* GFXH-515 / SW-5891: The binner emits 16 bit indices
-                         * for drawarrays, which means that if start + count >
-                         * 64k it would truncate the top bits.  Work around
-                         * this by emitting a limited number of primitives at
-                         * a time and reemitting the shader state pointing
-                         * farther down the vertex attribute arrays.
-                         *
-                         * To do this properly for line loops or trifans, we'd
-                         * need to make a new VB containing the first vertex
-                         * plus whatever remainder.
-                         */
-                        if (extra_index_bias) {
+                        uint32_t step;
+
+                        if (needs_drawarrays_shader_state) {
                                 vc4_emit_gl_shader_state(vc4, info,
                                                          extra_index_bias);
                         }
 
-                        if (start + count > max_verts) {
-                                switch (info->mode) {
-                                case PIPE_PRIM_POINTS:
-                                        this_count = step = max_verts;
-                                        break;
-                                case PIPE_PRIM_LINES:
-                                        this_count = step = max_verts - (max_verts % 2);
-                                        break;
-                                case PIPE_PRIM_LINE_STRIP:
-                                        this_count = max_verts;
-                                        step = max_verts - 1;
-                                        break;
-                                case PIPE_PRIM_LINE_LOOP:
-                                        this_count = max_verts;
-                                        step = max_verts - 1;
-                                        debug_warn_once("unhandled line loop "
-                                                        "looping behavior with "
-                                                        ">65535 verts\n");
-                                        break;
-                                case PIPE_PRIM_TRIANGLES:
-                                        this_count = step = max_verts - (max_verts % 3);
-                                        break;
-                                case PIPE_PRIM_TRIANGLE_STRIP:
-                                        this_count = max_verts;
-                                        step = max_verts - 2;
-                                        break;
-                                default:
-                                        debug_warn_once("unhandled primitive "
-                                                        "max vert count, truncating\n");
-                                        this_count = step = max_verts;
-                                }
-                        }
+                        u_split_draw(info, max_verts, &this_count, &step);
 
                         cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) {
                                 array.primitive_mode = info->mode;
@@ -448,6 +468,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                         count -= step;
                         extra_index_bias += start + step;
                         start = 0;
+                        needs_drawarrays_shader_state = true;
                 }
         }
 
@@ -517,6 +538,8 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
                      zsclear == PIPE_CLEAR_STENCIL) &&
                     (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
                     util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
+                        static const union pipe_color_union dummy_color = {};
+
                         perf_debug("Partial clear of Z+stencil buffer, "
                                    "drawing a quad instead of fast clearing\n");
                         vc4_blitter_save(vc4);
@@ -525,7 +548,8 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
                                            vc4->framebuffer.height,
                                            1,
                                            zsclear,
-                                           NULL, depth, stencil);
+                                           &dummy_color, depth, stencil,
+                                           false);
                         buffers &= ~zsclear;
                         if (!buffers)
                                 return;