broadcom/vc5: Port drawing commands to V3D 4.x.
authorEric Anholt <eric@anholt.net>
Fri, 5 Jan 2018 07:19:08 +0000 (23:19 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 13 Jan 2018 05:55:04 +0000 (21:55 -0800)
This required extending the CL submit ioctl, because the tile alloc/state
buffer setup has moved from the BCL to register writes.

src/gallium/drivers/vc5/Makefile.sources
src/gallium/drivers/vc5/meson.build
src/gallium/drivers/vc5/v3dx_context.h
src/gallium/drivers/vc5/v3dx_simulator.c
src/gallium/drivers/vc5/vc5_context.c
src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_draw.c
src/gallium/drivers/vc5/vc5_drm.h
src/gallium/drivers/vc5/vc5_job.c

index 8377af77fbb7017f7063bf92f479eb2bef4f45b5..72441e0ac004e122546875cd0463cfa9058b95d0 100644 (file)
@@ -6,7 +6,6 @@ C_SOURCES := \
        vc5_cl.h \
        vc5_context.c \
        vc5_context.h \
-       vc5_draw.c \
        vc5_drm.h \
        vc5_emit.c \
        vc5_fence.c \
@@ -27,5 +26,6 @@ C_SOURCES := \
 
 VC5_PER_VERSION_SOURCES = \
        v3dx_simulator.c \
+       vc5_draw.c \
        vc5_rcl.c \
        $()
index 951c321b51a703faeb8a86145c9893ff5f6332ed..01ed2ecd778882061bbda61914ee0b61621e0c07 100644 (file)
@@ -26,7 +26,6 @@ files_libvc5 = files(
   'vc5_cl.h',
   'vc5_context.c',
   'vc5_context.h',
-  'vc5_draw.c',
   'vc5_emit.c',
   'vc5_fence.c',
   'vc5_formats.c',
@@ -47,6 +46,7 @@ files_libvc5 = files(
 
 files_per_version = files(
   'v3dx_simulator.c',
+  'vc5_draw.c',
   'vc5_rcl.c',
 )
 
index e6bbfc6b3f1bb77bad42a068455675725d463a02..58012fa5f93df1cecbfdbba5bff2ff6e50be7c58 100644 (file)
@@ -29,6 +29,7 @@
 struct v3d_hw;
 
 void v3dX(emit_rcl)(struct vc5_job *job);
+void v3dX(draw_init)(struct pipe_context *pctx);
 
 void v3dX(simulator_init_regs)(struct v3d_hw *v3d);
 int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
index 2180787891d27f3f944006a9fac6403f0309aaf1..90fafaee1e2337824528670d8ada3f6e5ce9ffe9 100644 (file)
@@ -154,6 +154,17 @@ v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_vc5_submit_cl *submit,
 
         vc5_flush_caches(v3d);
 
+        if (submit->qma) {
+                V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
+                V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
+        }
+#if V3D_VERSION >= 41
+        if (submit->qts) {
+                V3D_WRITE(V3D_CLE_0_CT0QTS,
+                          V3D_CLE_0_CT0QTS_CTQTSEN_SET |
+                          submit->qts);
+        }
+#endif
         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
 
index d27f41bb5f88a016395f143dffc7adf0c0ae1f14..f6ae0ad989b4eb9b57b51c2592bcc32be480fbdb 100644 (file)
@@ -134,7 +134,10 @@ vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
         pctx->flush = vc5_pipe_flush;
         pctx->invalidate_resource = vc5_invalidate_resource;
 
-        vc5_draw_init(pctx);
+        if (screen->devinfo.ver >= 41)
+                v3d41_draw_init(pctx);
+        else
+                v3d33_draw_init(pctx);
         vc5_state_init(pctx);
         vc5_program_init(pctx);
         vc5_query_init(pctx);
index 70ee333c432c0a18cc53c6759a0b9a2d5f8b2431..2dde6a490f146977f8045780db88dac457923be4 100644 (file)
@@ -198,6 +198,7 @@ struct vc5_job {
         struct vc5_cl rcl;
         struct vc5_cl indirect;
         struct vc5_bo *tile_alloc;
+        struct vc5_bo *tile_state;
         uint32_t shader_rec_count;
 
         struct drm_vc5_submit_cl submit;
@@ -445,7 +446,6 @@ vc5_sampler_state(struct pipe_sampler_state *psampler)
 
 struct pipe_context *vc5_context_create(struct pipe_screen *pscreen,
                                         void *priv, unsigned flags);
-void vc5_draw_init(struct pipe_context *pctx);
 void vc5_state_init(struct pipe_context *pctx);
 void vc5_program_init(struct pipe_context *pctx);
 void vc5_program_fini(struct pipe_context *pctx);
index 18e8127311c0c3a92ccc7d5e6e7d12f2116f02ad..95a857f1abc8baaff74b42711e304ff9588988c1 100644 (file)
@@ -33,7 +33,6 @@
 #include "vc5_resource.h"
 #include "vc5_cl.h"
 #include "broadcom/compiler/v3d_compiler.h"
-#define V3D_VERSION 33
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"
 
@@ -57,12 +56,14 @@ vc5_start_draw(struct vc5_context *vc5)
         vc5_job_add_bo(job, job->bcl.bo);
 
         job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
-        struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen,
-                                           job->draw_tiles_y *
-                                           job->draw_tiles_x *
-                                           64,
-                                           "TSDA");
-
+        uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64;
+        job->tile_state = vc5_bo_alloc(vc5->screen,
+                                       job->draw_tiles_y *
+                                       job->draw_tiles_x *
+                                       tsda_per_tile_size,
+                                       "TSDA");
+
+#if V3D_VERSION < 40
         /* "Binning mode lists start with a Tile Binning Mode Configuration
          * item (120)"
          *
@@ -73,25 +74,30 @@ vc5_start_draw(struct vc5_context *vc5)
                         cl_address(job->tile_alloc, 0);
                 config.tile_allocation_memory_size = job->tile_alloc->size;
         }
+#endif
 
         cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
+#if V3D_VERSION >= 40
+                config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1;
+                config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1;
+                config.number_of_render_targets_minus_1 =
+                        MAX2(vc5->framebuffer.nr_cbufs, 1) - 1;
+#else /* V3D_VERSION < 40 */
                 config.tile_state_data_array_base_address =
-                        cl_address(tsda, 0);
+                        cl_address(job->tile_state, 0);
 
                 config.width_in_tiles = job->draw_tiles_x;
                 config.height_in_tiles = job->draw_tiles_y;
-
                 /* Must be >= 1 */
                 config.number_of_render_targets =
                         MAX2(vc5->framebuffer.nr_cbufs, 1);
+#endif /* V3D_VERSION < 40 */
 
                 config.multisample_mode_4x = job->msaa;
 
                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
         }
 
-        vc5_bo_unreference(&tsda);
-
         /* There's definitely nothing in the VCD cache we want. */
         cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
 
@@ -202,6 +208,12 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5,
                 shader.vertex_shader_uniforms_address = vs_uniforms;
                 shader.fragment_shader_uniforms_address = fs_uniforms;
 
+#if V3D_VERSION >= 41
+                shader.coordinate_shader_start_in_final_thread_section = true;
+                shader.vertex_shader_start_in_final_thread_section = true;
+                shader.fragment_shader_start_in_final_thread_section = true;
+#endif
+
                 shader.vertex_id_read_by_coordinate_shader =
                         vc5->prog.cs->prog_data.vs->uses_vid;
                 shader.instance_id_read_by_coordinate_shader =
@@ -234,6 +246,9 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5,
                                 vc5->prog.cs->prog_data.vs->vattr_sizes[i];
                         attr.number_of_values_read_by_vertex_shader =
                                 vc5->prog.vs->prog_data.vs->vattr_sizes[i];
+#if V3D_VERSION >= 41
+                        attr.maximum_index = 0xffffff;
+#endif
                 }
         }
 
@@ -374,14 +389,16 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 }
         }
 
-        /* The HW only processes transform feedback on primitives with the
-         * flag set.
-         */
         uint32_t prim_tf_enable = 0;
+#if V3D_VERSION < 40
+        /* V3D 3.x: The HW only processes transform feedback on primitives
+         * with the flag set.
+         */
         if (vc5->streamout.num_targets)
                 prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
+#endif
 
-        vc5_tf_statistics_record(vc5, info, prim_tf_enable);
+        vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets);
 
         /* Note that the primitive type fields match with OpenGL/gallium
          * definitions, up to but not including QUADS.
@@ -401,12 +418,23 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 }
                 struct vc5_resource *rsc = vc5_resource(prsc);
 
+#if V3D_VERSION >= 40
+                cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
+                        ib.address = cl_address(rsc->bo, 0);
+                        ib.size = rsc->bo->size;
+                }
+#endif
+
                 if (info->instance_count > 1) {
                         cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
+#if V3D_VERSION >= 40
+                                prim.index_offset = offset;
+#else /* V3D_VERSION < 40 */
                                 prim.maximum_index = (1u << 31) - 1; /* XXX */
                                 prim.address_of_indices_list =
                                         cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
                                 prim.mode = info->mode | prim_tf_enable;
                                 prim.enable_primitive_restarts = info->primitive_restart;
 
@@ -417,9 +445,13 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                         cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
                                 prim.length = info->count;
+#if V3D_VERSION >= 40
+                                prim.index_offset = offset;
+#else /* V3D_VERSION < 40 */
                                 prim.maximum_index = (1u << 31) - 1; /* XXX */
                                 prim.address_of_indices_list =
                                         cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
                                 prim.mode = info->mode | prim_tf_enable;
                                 prim.enable_primitive_restarts = info->primitive_restart;
                         }
@@ -612,7 +644,7 @@ vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
 }
 
 void
-vc5_draw_init(struct pipe_context *pctx)
+v3dX(draw_init)(struct pipe_context *pctx)
 {
         pctx->draw_vbo = vc5_draw_vbo;
         pctx->clear = vc5_clear;
index e70cf9d56a64c200094472c993965270a5b15337..2c21ee77a138314e77650cfd45d1d35907888f5c 100644 (file)
@@ -77,6 +77,19 @@ struct drm_vc5_submit_cl {
         /** End address of the RCL (first byte after the RCL) */
        __u32 rcl_end;
 
+       /* Offset of the tile alloc memory
+        *
+        * This is optional on V3D 3.3 (where the CL can set the value) but
+        * required on V3D 4.1.
+        */
+       __u32 qma;
+
+        /** Size of the tile alloc memory. */
+       __u32 qms;
+
+        /** Offset of the tile state data array. */
+       __u32 qts;
+
        /* Pointer to a u32 array of the BOs that are referenced by the job.
         */
        __u64 bo_handles;
index a5edbe7c2073eaa89675f2efe18b1829a260f9fd..aa56ad6f24101df313f9130f60c150408184b88f 100644 (file)
@@ -84,6 +84,7 @@ vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
         vc5_destroy_cl(&job->rcl);
         vc5_destroy_cl(&job->indirect);
         vc5_bo_unreference(&job->tile_alloc);
+        vc5_bo_unreference(&job->tile_state);
 
         ralloc_free(job);
 }
@@ -419,6 +420,18 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
 
+        /* On V3D 4.1, the tile alloc/state setup moved to register writes
+         * instead of binner pac`kets.
+         */
+        if (screen->devinfo.ver >= 41) {
+                vc5_job_add_bo(job, job->tile_alloc);
+                job->submit.qma = job->tile_alloc->offset;
+                job->submit.qms = job->tile_alloc->size;
+
+                vc5_job_add_bo(job, job->tile_state);
+                job->submit.qts = job->tile_state->offset;
+        }
+
         vc5_clif_dump(vc5, job);
 
         if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {