From 07a68835a153c808a530a277396bd9f212e2bc06 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 1 Sep 2019 10:15:23 +0200 Subject: [PATCH] panfrost: Pass a batch to panfrost_{allocate,upload}_transient() We need that if we want to upload transient buffers to a batch that's not currently bound to the context, which in turn will be needed if we want to relax the batch serialization we have right now (only flush batches when we need to: on a flush request, or when one batch depends on the result of other batches). Signed-off-by: Boris Brezillon Reviewed-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_allocate.c | 10 ++-- src/gallium/drivers/panfrost/pan_allocate.h | 7 +-- src/gallium/drivers/panfrost/pan_compute.c | 10 ++-- src/gallium/drivers/panfrost/pan_context.c | 51 +++++++++++-------- src/gallium/drivers/panfrost/pan_fragment.c | 2 +- src/gallium/drivers/panfrost/pan_instancing.c | 2 +- src/gallium/drivers/panfrost/pan_mfbd.c | 3 +- src/gallium/drivers/panfrost/pan_scoreboard.c | 3 +- src/gallium/drivers/panfrost/pan_sfbd.c | 2 +- src/gallium/drivers/panfrost/pan_varyings.c | 8 +-- 10 files changed, 57 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c index a22b1a5a88d..44af631d355 100644 --- a/src/gallium/drivers/panfrost/pan_allocate.c +++ b/src/gallium/drivers/panfrost/pan_allocate.c @@ -39,10 +39,9 @@ * into the pool and copy there */ struct panfrost_transfer -panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz) +panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz) { - struct panfrost_screen *screen = pan_screen(ctx->base.screen); - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen); /* Pad the size */ sz = ALIGN_POT(sz, ALIGNMENT); @@ -89,9 +88,10 @@ panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz) } mali_ptr -panfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz) +panfrost_upload_transient(struct panfrost_batch *batch, const void *data, + size_t sz) { - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz); memcpy(transfer.cpu, data, sz); return transfer.gpu; } diff --git a/src/gallium/drivers/panfrost/pan_allocate.h b/src/gallium/drivers/panfrost/pan_allocate.h index c0aff62df4a..a80eadaffce 100644 --- a/src/gallium/drivers/panfrost/pan_allocate.h +++ b/src/gallium/drivers/panfrost/pan_allocate.h @@ -33,7 +33,7 @@ #include "util/list.h" -struct panfrost_context; +struct panfrost_batch; /* Represents a fat pointer for GPU-mapped memory, returned from the transient * allocator and not used for much else */ @@ -64,9 +64,10 @@ struct panfrost_bo { }; struct panfrost_transfer -panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz); +panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz); mali_ptr -panfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz); +panfrost_upload_transient(struct panfrost_batch *batch, const void *data, + size_t sz); #endif /* __PAN_ALLOCATE_H__ */ diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 51967fe481e..4639c1b03c3 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -87,6 +87,9 @@ panfrost_launch_grid(struct pipe_context *pipe, { struct panfrost_context *ctx = pan_context(pipe); + /* TODO: Do we want a special compute-only batch? */ + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + ctx->compute_grid = info; struct mali_job_descriptor_header job = { @@ -113,7 +116,7 @@ panfrost_launch_grid(struct pipe_context *pipe, }; payload->postfix.framebuffer = - panfrost_upload_transient(ctx, &compute_fbd, sizeof(compute_fbd)); + panfrost_upload_transient(batch, &compute_fbd, sizeof(compute_fbd)); /* Invoke according to the grid info */ @@ -123,13 +126,10 @@ panfrost_launch_grid(struct pipe_context *pipe, /* Upload the payload */ - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload)); memcpy(transfer.cpu, &job, sizeof(job)); memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); - /* TODO: Do we want a special compute-only batch? */ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - /* Queue the job */ panfrost_scoreboard_queue_compute_job(batch, transfer); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 35b36f501e2..3f036ce4b5f 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -167,17 +167,19 @@ panfrost_clear( static mali_ptr panfrost_attach_vt_mfbd(struct panfrost_context *ctx) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(ctx, ~0); - return panfrost_upload_transient(ctx, &mfbd, sizeof(mfbd)) | MALI_MFBD; + return panfrost_upload_transient(batch, &mfbd, sizeof(mfbd)) | MALI_MFBD; } static mali_ptr panfrost_attach_vt_sfbd(struct panfrost_context *ctx) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(ctx, ~0); - return panfrost_upload_transient(ctx, &sfbd, sizeof(sfbd)) | MALI_SFBD; + return panfrost_upload_transient(batch, &sfbd, sizeof(sfbd)) | MALI_SFBD; } static void @@ -434,6 +436,7 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) struct panfrost_transfer panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct mali_job_descriptor_header job = { .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, .job_descriptor_size = 1, @@ -441,7 +444,7 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX]; - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload)); memcpy(transfer.cpu, &job, sizeof(job)); memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); return transfer; @@ -471,10 +474,11 @@ panfrost_writes_point_size(struct panfrost_context *ctx) static void panfrost_stage_attributes(struct panfrost_context *ctx) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_vertex_state *so = ctx->vertex; size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz); struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; /* Copy as-is for the first pass */ @@ -527,6 +531,7 @@ panfrost_stage_attributes(struct panfrost_context *ctx) static void panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); size_t desc_size = sizeof(struct mali_sampler_descriptor); for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { @@ -536,7 +541,7 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) size_t transfer_size = desc_size * ctx->sampler_count[t]; struct panfrost_transfer transfer = - panfrost_allocate_transient(ctx, transfer_size); + panfrost_allocate_transient(batch, transfer_size); struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; @@ -626,13 +631,15 @@ panfrost_upload_tex( } } - return panfrost_upload_transient(ctx, &view->hw, + return panfrost_upload_transient(batch, &view->hw, sizeof(struct mali_texture_descriptor)); } static void panfrost_upload_texture_descriptors(struct panfrost_context *ctx) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { mali_ptr trampoline = 0; @@ -643,7 +650,7 @@ panfrost_upload_texture_descriptors(struct panfrost_context *ctx) trampolines[i] = panfrost_upload_tex(ctx, ctx->sampler_views[t][i]); - trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); + trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); } ctx->payloads[t].postfix.texture_trampoline = trampoline; @@ -789,12 +796,14 @@ panfrost_map_constant_buffer_gpu( struct pipe_constant_buffer *cb = &buf->cb[index]; struct panfrost_resource *rsrc = pan_resource(cb->buffer); - if (rsrc) + if (rsrc) { return rsrc->bo->gpu; - else if (cb->user_buffer) - return panfrost_upload_transient(ctx, cb->user_buffer, cb->buffer_size); - else + } else if (cb->user_buffer) { + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size); + } else { unreachable("No constant buffer"); + } } /* Compute number of UBOs active (more specifically, compute the highest UBO @@ -831,9 +840,10 @@ panfrost_patch_shader_state( * transient uploaded */ if (should_upload) { - return panfrost_upload_transient(ctx, - ss->tripipe, - sizeof(struct mali_shader_meta)); + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + + return panfrost_upload_transient(batch, ss->tripipe, + sizeof(struct mali_shader_meta)); } /* If we don't need an upload, don't bother */ @@ -1020,7 +1030,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) } size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count); - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size); memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); ctx->payloads[PIPE_SHADER_FRAGMENT].postfix._shader_upper = (transfer.gpu) >> 4; @@ -1111,7 +1121,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) size_t sys_size = sizeof(float) * 4 * ss->sysval_count; size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0; size_t size = sys_size + uniform_size; - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size); /* Upload sysvals requested by the shader */ panfrost_upload_sysvals(ctx, transfer.cpu, ss, i); @@ -1167,7 +1177,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ubos[ubo].ptr = gpu >> 2; } - mali_ptr ubufs = panfrost_upload_transient(ctx, ubos, sz); + mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz); postfix->uniforms = transfer.gpu; postfix->uniform_buffers = ubufs; @@ -1267,7 +1277,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) view.clip_maxz = maxz; ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.viewport = - panfrost_upload_transient(ctx, + panfrost_upload_transient(batch, &view, sizeof(struct mali_viewport)); @@ -1517,7 +1527,7 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe } else { /* Otherwise, we need to upload to transient memory */ const uint8_t *ibuf8 = (const uint8_t *) info->index.user; - return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); + return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size); } } @@ -2577,13 +2587,14 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct panfrost_context *ctx = pan_context(pipe); struct panfrost_query *query = (struct panfrost_query *) q; + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); switch (query->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* Allocate a word for the query results to be stored */ - query->transfer = panfrost_allocate_transient(ctx, sizeof(unsigned)); + query->transfer = panfrost_allocate_transient(batch, sizeof(unsigned)); ctx->occlusion_query = query; break; diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c index f92bf4e3522..dc89bce1b76 100644 --- a/src/gallium/drivers/panfrost/pan_fragment.c +++ b/src/gallium/drivers/panfrost/pan_fragment.c @@ -108,7 +108,7 @@ panfrost_fragment_job(struct panfrost_context *ctx, bool has_draws) * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes * of zero padding in between. */ - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload)); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(header) + sizeof(payload)); memcpy(transfer.cpu, &header, sizeof(header)); memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); return transfer.gpu; diff --git a/src/gallium/drivers/panfrost/pan_instancing.c b/src/gallium/drivers/panfrost/pan_instancing.c index 44fe0a344aa..e7e1f1d0e12 100644 --- a/src/gallium/drivers/panfrost/pan_instancing.c +++ b/src/gallium/drivers/panfrost/pan_instancing.c @@ -341,7 +341,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch) /* Upload whatever we emitted and go */ ctx->payloads[PIPE_SHADER_VERTEX].postfix.attributes = - panfrost_upload_transient(ctx, attrs, k * sizeof(union mali_attr)); + panfrost_upload_transient(batch, attrs, k * sizeof(union mali_attr)); } diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index 8b1831c9208..da1827a30ca 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -364,8 +364,9 @@ panfrost_mfbd_upload( (has_extra ? sizeof(struct bifrost_fb_extra) : 0) + sizeof(struct bifrost_render_target) * 4; + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_transfer m_f_trans = - panfrost_allocate_transient(ctx, total_sz); + panfrost_allocate_transient(batch, total_sz); /* Do the transfer */ diff --git a/src/gallium/drivers/panfrost/pan_scoreboard.c b/src/gallium/drivers/panfrost/pan_scoreboard.c index 776833a13ab..f0771a2c5b5 100644 --- a/src/gallium/drivers/panfrost/pan_scoreboard.c +++ b/src/gallium/drivers/panfrost/pan_scoreboard.c @@ -282,7 +282,8 @@ panfrost_set_value_job(struct panfrost_context *ctx, mali_ptr polygon_list) .unknown = 0x3, }; - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(payload)); memcpy(transfer.cpu, &job, sizeof(job)); memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index f58c054c8f2..843e5632491 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -147,5 +147,5 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool has_draws) if (batch->requirements & PAN_REQ_MSAA) fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B; - return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD; + return panfrost_upload_transient(batch, &fb, sizeof(fb)) | MALI_SFBD; } diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c index 12760109b7c..7adfc3ec4c7 100644 --- a/src/gallium/drivers/panfrost/pan_varyings.c +++ b/src/gallium/drivers/panfrost/pan_varyings.c @@ -38,8 +38,9 @@ panfrost_emit_varyings( slot->size = stride * count; slot->shift = slot->extra_flags = 0; + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_transfer transfer = - panfrost_allocate_transient(ctx, slot->size); + panfrost_allocate_transient(batch, slot->size); slot->elements = transfer.gpu | MALI_ATTR_LINEAR; @@ -179,7 +180,8 @@ panfrost_emit_varying_descriptor( size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; - struct panfrost_transfer trans = panfrost_allocate_transient(ctx, + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + struct panfrost_transfer trans = panfrost_allocate_transient(batch, vs_size + fs_size); struct pipe_stream_output_info so = vs->stream_output; @@ -398,7 +400,7 @@ panfrost_emit_varying_descriptor( } } - mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); + mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr)); ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p; ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p; -- 2.30.2