From 76de3e691c661f7e7d64e0ca333ba429ca74e5ba Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 13 Aug 2020 14:32:23 -0400 Subject: [PATCH] panfrost: Merge attribute packing routines In preparation for streamlining the packing, we need related code in one place. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 67 ++++++++++++++------ src/gallium/drivers/panfrost/pan_cmdstream.h | 4 -- src/gallium/drivers/panfrost/pan_context.c | 56 ---------------- src/gallium/drivers/panfrost/pan_context.h | 4 -- 4 files changed, 49 insertions(+), 82 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 3e260bca0c6..91687ace096 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1337,23 +1337,6 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch, postfix->sampler_descriptor = T.gpu; } -void -panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch, - struct mali_vertex_tiler_postfix *vertex_postfix) -{ - struct panfrost_context *ctx = batch->ctx; - - if (!ctx->vertex) - return; - - struct panfrost_vertex_state *so = ctx->vertex; - - panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix); - vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw, - sizeof(*so->hw) * - PAN_MAX_ATTRIBUTE); -} - void panfrost_emit_vertex_data(struct panfrost_batch *batch, struct mali_vertex_tiler_postfix *vertex_postfix) @@ -1456,10 +1439,58 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, panfrost_instance_id(ctx->padded_count, &attrs[k]); so->hw[PAN_INSTANCE_ID].index = k++; - /* Upload whatever we emitted and go */ + /* Fixup offsets for the second pass. Recall that the hardware + * calculates attribute addresses as: + * + * addr = base + (stride * vtx) + src_offset; + * + * However, on Mali, base must be aligned to 64-bytes, so we + * instead let: + * + * base' = base & ~63 = base - (base & 63) + * + * To compensate when using base' (see emit_vertex_data), we have + * to adjust src_offset by the masked off piece: + * + * addr' = base' + (stride * vtx) + (src_offset + (base & 63)) + * = base - (base & 63) + (stride * vtx) + src_offset + (base & 63) + * = base + (stride * vtx) + src_offset + * = addr; + * + * QED. + */ + + unsigned start = vertex_postfix->offset_start; + + for (unsigned i = 0; i < so->num_elements; ++i) { + unsigned vbi = so->pipe[i].vertex_buffer_index; + struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; + + /* Adjust by the masked off bits of the offset. Make sure we + * read src_offset from so->hw (which is not GPU visible) + * rather than target (which is) due to caching effects */ + + unsigned src_offset = so->pipe[i].src_offset; + + /* BOs aligned to 4k so guaranteed aligned to 64 */ + src_offset += (buf->buffer_offset & 63); + + /* Also, somewhat obscurely per-instance data needs to be + * offset in response to a delayed start in an indexed draw */ + + if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) + src_offset -= buf->stride * start; + + so->hw[i].src_offset = src_offset; + } + vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs, k * sizeof(*attrs)); + + vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw, + sizeof(*so->hw) * + PAN_MAX_ATTRIBUTE); } static mali_ptr diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index e0425ba449c..c6916033cc1 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -80,10 +80,6 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch, enum pipe_shader_type stage, struct mali_vertex_tiler_postfix *postfix); -void -panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch, - struct mali_vertex_tiler_postfix *vertex_postfix); - void panfrost_emit_vertex_data(struct panfrost_batch *batch, struct mali_vertex_tiler_postfix *vertex_postfix); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 736954745e9..72c435792c5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -163,61 +163,6 @@ panfrost_writes_point_size(struct panfrost_context *ctx) return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS; } -void -panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx, - struct mali_vertex_tiler_postfix *vertex_postfix) -{ - if (!ctx->vertex) - return; - - struct panfrost_vertex_state *so = ctx->vertex; - - /* Fixup offsets for the second pass. Recall that the hardware - * calculates attribute addresses as: - * - * addr = base + (stride * vtx) + src_offset; - * - * However, on Mali, base must be aligned to 64-bytes, so we - * instead let: - * - * base' = base & ~63 = base - (base & 63) - * - * To compensate when using base' (see emit_vertex_data), we have - * to adjust src_offset by the masked off piece: - * - * addr' = base' + (stride * vtx) + (src_offset + (base & 63)) - * = base - (base & 63) + (stride * vtx) + src_offset + (base & 63) - * = base + (stride * vtx) + src_offset - * = addr; - * - * QED. - */ - - unsigned start = vertex_postfix->offset_start; - - for (unsigned i = 0; i < so->num_elements; ++i) { - unsigned vbi = so->pipe[i].vertex_buffer_index; - struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; - - /* Adjust by the masked off bits of the offset. Make sure we - * read src_offset from so->hw (which is not GPU visible) - * rather than target (which is) due to caching effects */ - - unsigned src_offset = so->pipe[i].src_offset; - - /* BOs aligned to 4k so guaranteed aligned to 64 */ - src_offset += (buf->buffer_offset & 63); - - /* Also, somewhat obscurely per-instance data needs to be - * offset in response to a delayed start in an indexed draw */ - - if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) - src_offset -= buf->stride * start; - - so->hw[i].src_offset = src_offset; - } -} - /* Compute number of UBOs active (more specifically, compute the highest UBO * number addressable -- if there are gaps, include them in the count anyway). * We always include UBO #0 in the count, since we *need* uniforms enabled for @@ -422,7 +367,6 @@ panfrost_draw_vbo( &primitive_size); panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX, &vertex_postfix); panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT, &tiler_postfix); - panfrost_emit_vertex_attr_meta(batch, &vertex_postfix); panfrost_emit_sampler_descriptors(batch, PIPE_SHADER_VERTEX, &vertex_postfix); panfrost_emit_sampler_descriptors(batch, PIPE_SHADER_FRAGMENT, &tiler_postfix); panfrost_emit_texture_descriptors(batch, PIPE_SHADER_VERTEX, &vertex_postfix); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 390010ce695..a53db0d611f 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -309,10 +309,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx); bool panfrost_writes_point_size(struct panfrost_context *ctx); -void -panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx, - struct mali_vertex_tiler_postfix *vertex_postfix); - struct panfrost_transfer panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler); -- 2.30.2