From: Eric Anholt Date: Fri, 5 Jan 2018 07:19:08 +0000 (-0800) Subject: broadcom/vc5: Port drawing commands to V3D 4.x. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=57965755e2c8089e5b52897fa86305e595f6792f;p=mesa.git broadcom/vc5: Port drawing commands to V3D 4.x. This required extending the CL submit ioctl, because the tile alloc/state buffer setup has moved from the BCL to register writes. --- diff --git a/src/gallium/drivers/vc5/Makefile.sources b/src/gallium/drivers/vc5/Makefile.sources index 8377af77fbb..72441e0ac00 100644 --- a/src/gallium/drivers/vc5/Makefile.sources +++ b/src/gallium/drivers/vc5/Makefile.sources @@ -6,7 +6,6 @@ C_SOURCES := \ vc5_cl.h \ vc5_context.c \ vc5_context.h \ - vc5_draw.c \ vc5_drm.h \ vc5_emit.c \ vc5_fence.c \ @@ -27,5 +26,6 @@ C_SOURCES := \ VC5_PER_VERSION_SOURCES = \ v3dx_simulator.c \ + vc5_draw.c \ vc5_rcl.c \ $() diff --git a/src/gallium/drivers/vc5/meson.build b/src/gallium/drivers/vc5/meson.build index 951c321b51a..01ed2ecd778 100644 --- a/src/gallium/drivers/vc5/meson.build +++ b/src/gallium/drivers/vc5/meson.build @@ -26,7 +26,6 @@ files_libvc5 = files( 'vc5_cl.h', 'vc5_context.c', 'vc5_context.h', - 'vc5_draw.c', 'vc5_emit.c', 'vc5_fence.c', 'vc5_formats.c', @@ -47,6 +46,7 @@ files_libvc5 = files( files_per_version = files( 'v3dx_simulator.c', + 'vc5_draw.c', 'vc5_rcl.c', ) diff --git a/src/gallium/drivers/vc5/v3dx_context.h b/src/gallium/drivers/vc5/v3dx_context.h index e6bbfc6b3f1..58012fa5f93 100644 --- a/src/gallium/drivers/vc5/v3dx_context.h +++ b/src/gallium/drivers/vc5/v3dx_context.h @@ -29,6 +29,7 @@ struct v3d_hw; void v3dX(emit_rcl)(struct vc5_job *job); +void v3dX(draw_init)(struct pipe_context *pctx); void v3dX(simulator_init_regs)(struct v3d_hw *v3d); int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, diff --git a/src/gallium/drivers/vc5/v3dx_simulator.c b/src/gallium/drivers/vc5/v3dx_simulator.c index 2180787891d..90fafaee1e2 100644 --- a/src/gallium/drivers/vc5/v3dx_simulator.c +++ b/src/gallium/drivers/vc5/v3dx_simulator.c @@ -154,6 +154,17 @@ v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_vc5_submit_cl *submit, vc5_flush_caches(v3d); + if (submit->qma) { + V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); + V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); + } +#if V3D_VERSION >= 41 + if (submit->qts) { + V3D_WRITE(V3D_CLE_0_CT0QTS, + V3D_CLE_0_CT0QTS_CTQTSEN_SET | + submit->qts); + } +#endif V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c index d27f41bb5f8..f6ae0ad989b 100644 --- a/src/gallium/drivers/vc5/vc5_context.c +++ b/src/gallium/drivers/vc5/vc5_context.c @@ -134,7 +134,10 @@ vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) pctx->flush = vc5_pipe_flush; pctx->invalidate_resource = vc5_invalidate_resource; - vc5_draw_init(pctx); + if (screen->devinfo.ver >= 41) + v3d41_draw_init(pctx); + else + v3d33_draw_init(pctx); vc5_state_init(pctx); vc5_program_init(pctx); vc5_query_init(pctx); diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h index 70ee333c432..2dde6a490f1 100644 --- a/src/gallium/drivers/vc5/vc5_context.h +++ b/src/gallium/drivers/vc5/vc5_context.h @@ -198,6 +198,7 @@ struct vc5_job { struct vc5_cl rcl; struct vc5_cl indirect; struct vc5_bo *tile_alloc; + struct vc5_bo *tile_state; uint32_t shader_rec_count; struct drm_vc5_submit_cl submit; @@ -445,7 +446,6 @@ vc5_sampler_state(struct pipe_sampler_state *psampler) struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); -void vc5_draw_init(struct pipe_context *pctx); void vc5_state_init(struct pipe_context *pctx); void vc5_program_init(struct pipe_context *pctx); void vc5_program_fini(struct pipe_context *pctx); diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c index 18e8127311c..95a857f1abc 100644 --- a/src/gallium/drivers/vc5/vc5_draw.c +++ b/src/gallium/drivers/vc5/vc5_draw.c @@ -33,7 +33,6 @@ #include "vc5_resource.h" #include "vc5_cl.h" #include "broadcom/compiler/v3d_compiler.h" -#define V3D_VERSION 33 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" @@ -57,12 +56,14 @@ vc5_start_draw(struct vc5_context *vc5) vc5_job_add_bo(job, job->bcl.bo); job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); - struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen, - job->draw_tiles_y * - job->draw_tiles_x * - 64, - "TSDA"); - + uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64; + job->tile_state = vc5_bo_alloc(vc5->screen, + job->draw_tiles_y * + job->draw_tiles_x * + tsda_per_tile_size, + "TSDA"); + +#if V3D_VERSION < 40 /* "Binning mode lists start with a Tile Binning Mode Configuration * item (120)" * @@ -73,25 +74,30 @@ vc5_start_draw(struct vc5_context *vc5) cl_address(job->tile_alloc, 0); config.tile_allocation_memory_size = job->tile_alloc->size; } +#endif cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { +#if V3D_VERSION >= 40 + config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1; + config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1; + config.number_of_render_targets_minus_1 = + MAX2(vc5->framebuffer.nr_cbufs, 1) - 1; +#else /* V3D_VERSION < 40 */ config.tile_state_data_array_base_address = - cl_address(tsda, 0); + cl_address(job->tile_state, 0); config.width_in_tiles = job->draw_tiles_x; config.height_in_tiles = job->draw_tiles_y; - /* Must be >= 1 */ config.number_of_render_targets = MAX2(vc5->framebuffer.nr_cbufs, 1); +#endif /* V3D_VERSION < 40 */ config.multisample_mode_4x = job->msaa; config.maximum_bpp_of_all_render_targets = job->internal_bpp; } - vc5_bo_unreference(&tsda); - /* There's definitely nothing in the VCD cache we want. */ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); @@ -202,6 +208,12 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5, shader.vertex_shader_uniforms_address = vs_uniforms; shader.fragment_shader_uniforms_address = fs_uniforms; +#if V3D_VERSION >= 41 + shader.coordinate_shader_start_in_final_thread_section = true; + shader.vertex_shader_start_in_final_thread_section = true; + shader.fragment_shader_start_in_final_thread_section = true; +#endif + shader.vertex_id_read_by_coordinate_shader = vc5->prog.cs->prog_data.vs->uses_vid; shader.instance_id_read_by_coordinate_shader = @@ -234,6 +246,9 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5, vc5->prog.cs->prog_data.vs->vattr_sizes[i]; attr.number_of_values_read_by_vertex_shader = vc5->prog.vs->prog_data.vs->vattr_sizes[i]; +#if V3D_VERSION >= 41 + attr.maximum_index = 0xffffff; +#endif } } @@ -374,14 +389,16 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } } - /* The HW only processes transform feedback on primitives with the - * flag set. - */ uint32_t prim_tf_enable = 0; +#if V3D_VERSION < 40 + /* V3D 3.x: The HW only processes transform feedback on primitives + * with the flag set. + */ if (vc5->streamout.num_targets) prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); +#endif - vc5_tf_statistics_record(vc5, info, prim_tf_enable); + vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets); /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. @@ -401,12 +418,23 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } struct vc5_resource *rsc = vc5_resource(prsc); +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { + ib.address = cl_address(rsc->bo, 0); + ib.size = rsc->bo->size; + } +#endif + if (info->instance_count > 1) { cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ prim.maximum_index = (1u << 31) - 1; /* XXX */ prim.address_of_indices_list = cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ prim.mode = info->mode | prim_tf_enable; prim.enable_primitive_restarts = info->primitive_restart; @@ -417,9 +445,13 @@ vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; prim.length = info->count; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ prim.maximum_index = (1u << 31) - 1; /* XXX */ prim.address_of_indices_list = cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ prim.mode = info->mode | prim_tf_enable; prim.enable_primitive_restarts = info->primitive_restart; } @@ -612,7 +644,7 @@ vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, } void -vc5_draw_init(struct pipe_context *pctx) +v3dX(draw_init)(struct pipe_context *pctx) { pctx->draw_vbo = vc5_draw_vbo; pctx->clear = vc5_clear; diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/src/gallium/drivers/vc5/vc5_drm.h index e70cf9d56a6..2c21ee77a13 100644 --- a/src/gallium/drivers/vc5/vc5_drm.h +++ b/src/gallium/drivers/vc5/vc5_drm.h @@ -77,6 +77,19 @@ struct drm_vc5_submit_cl { /** End address of the RCL (first byte after the RCL) */ __u32 rcl_end; + /* Offset of the tile alloc memory + * + * This is optional on V3D 3.3 (where the CL can set the value) but + * required on V3D 4.1. + */ + __u32 qma; + + /** Size of the tile alloc memory. */ + __u32 qms; + + /** Offset of the tile state data array. */ + __u32 qts; + /* Pointer to a u32 array of the BOs that are referenced by the job. */ __u64 bo_handles; diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c index a5edbe7c207..aa56ad6f241 100644 --- a/src/gallium/drivers/vc5/vc5_job.c +++ b/src/gallium/drivers/vc5/vc5_job.c @@ -84,6 +84,7 @@ vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) vc5_destroy_cl(&job->rcl); vc5_destroy_cl(&job->indirect); vc5_bo_unreference(&job->tile_alloc); + vc5_bo_unreference(&job->tile_state); ralloc_free(job); } @@ -419,6 +420,18 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); + /* On V3D 4.1, the tile alloc/state setup moved to register writes + * instead of binner pac`kets. + */ + if (screen->devinfo.ver >= 41) { + vc5_job_add_bo(job, job->tile_alloc); + job->submit.qma = job->tile_alloc->offset; + job->submit.qms = job->tile_alloc->size; + + vc5_job_add_bo(job, job->tile_state); + job->submit.qts = job->tile_state->offset; + } + vc5_clif_dump(vc5, job); if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {