From 1cfbc5cff5907aa690d7c5f6514af34c438c25a6 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 20 Aug 2020 13:36:46 -0400 Subject: [PATCH] panfrost: Fix alignment on Bifrost This was real "fun" to debug... Signed-off-by: Alyssa Rosenzweig Fixes: 373a204bdd7 ("panfrost: Pass alignments explicitly") Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 21 ++++++++++---------- src/gallium/drivers/panfrost/pan_fragment.c | 2 +- src/gallium/drivers/panfrost/pan_job.c | 4 ++-- src/gallium/drivers/panfrost/pan_sfbd.c | 2 +- src/panfrost/lib/pan_pool.c | 10 +++++++++- src/panfrost/lib/pan_pool.h | 6 +++++- src/panfrost/lib/pan_scoreboard.c | 2 +- 7 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 0b4a9bed4f4..cb3a29c053e 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -72,7 +72,7 @@ panfrost_vt_emit_shared_memory(struct panfrost_context *ctx, shared.scratchpad = stack->gpu; } - postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared)); + postfix->shared_memory = panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64); } static void @@ -905,10 +905,10 @@ panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch, * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ return rsrc->bo->gpu + cb->buffer_offset; } else if (cb->user_buffer) { - return panfrost_pool_upload(&batch->pool, + return panfrost_pool_upload_aligned(&batch->pool, cb->user_buffer + cb->buffer_offset, - cb->buffer_size); + cb->buffer_size, 16); } else { unreachable("No constant buffer"); } @@ -1199,8 +1199,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch, .shared_shift = util_logbase2(single_size) + 1 }; - vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared, - sizeof(shared)); + vtp->postfix.shared_memory = panfrost_pool_upload_aligned(&batch->pool, &shared, + sizeof(shared), 64); } static mali_ptr @@ -1290,10 +1290,11 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch, trampolines[i] = panfrost_get_tex_desc(batch, stage, view); } - postfix->textures = panfrost_pool_upload(&batch->pool, + postfix->textures = panfrost_pool_upload_aligned(&batch->pool, trampolines, sizeof(uint64_t) * - ctx->sampler_view_count[stage]); + ctx->sampler_view_count[stage], + sizeof(uint64_t)); } } @@ -1338,7 +1339,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, struct panfrost_transfer S = panfrost_pool_alloc_aligned(&batch->pool, MALI_ATTRIBUTE_BUFFER_LENGTH * vs->attribute_count * (could_npot ? 2 : 1), - MALI_ATTRIBUTE_BUFFER_LENGTH); + MALI_ATTRIBUTE_BUFFER_LENGTH * 2); struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool, MALI_ATTRIBUTE_LENGTH * vs->attribute_count, @@ -1996,7 +1997,7 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch, unsigned xfb_base = pan_xfb_base(present); struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool, MALI_ATTRIBUTE_BUFFER_LENGTH * (xfb_base + ctx->streamout.num_targets), - MALI_ATTRIBUTE_BUFFER_LENGTH); + MALI_ATTRIBUTE_BUFFER_LENGTH * 2); struct mali_attribute_buffer_packed *varyings = (struct mali_attribute_buffer_packed *) T.cpu; @@ -2159,5 +2160,5 @@ panfrost_emit_sample_locations(struct panfrost_batch *batch) 0, 0, }; - return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t)); + return panfrost_pool_upload_aligned(&batch->pool, locations, 96 * sizeof(uint16_t), 64); } diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c index 6e4875ba171..9c8f31fc96c 100644 --- a/src/gallium/drivers/panfrost/pan_fragment.c +++ b/src/gallium/drivers/panfrost/pan_fragment.c @@ -100,7 +100,7 @@ panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws) .framebuffer = framebuffer, }; - struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool, sizeof(header) + sizeof(payload)); + struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(&batch->pool, sizeof(header) + sizeof(payload), 64); memcpy(transfer.cpu, &header, sizeof(header)); memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); return transfer.gpu; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index a0f0f47940c..6c04b21a643 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -706,10 +706,10 @@ panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_coun .flags = 0x0, .width = MALI_POSITIVE(batch->key.width), .height = MALI_POSITIVE(batch->key.height), - .tiler_heap_meta = panfrost_pool_upload(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta)), + .tiler_heap_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_heap_meta, sizeof(tiler_heap_meta), 64) }; - batch->tiler_meta = panfrost_pool_upload(&batch->pool, &tiler_meta, sizeof(tiler_meta)); + batch->tiler_meta = panfrost_pool_upload_aligned(&batch->pool, &tiler_meta, sizeof(tiler_meta), 64); return batch->tiler_meta; } diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c index 58d33dcddde..d9173baace1 100644 --- a/src/gallium/drivers/panfrost/pan_sfbd.c +++ b/src/gallium/drivers/panfrost/pan_sfbd.c @@ -266,5 +266,5 @@ panfrost_sfbd_fragment(struct panfrost_batch *batch, bool has_draws) fb.format.unk2 |= MALI_SFBD_FORMAT_MSAA_B; } - return panfrost_pool_upload(&batch->pool, &fb, sizeof(fb)); + return panfrost_pool_upload_aligned(&batch->pool, &fb, sizeof(fb), 64); } diff --git a/src/panfrost/lib/pan_pool.c b/src/panfrost/lib/pan_pool.c index 3451e1b9da1..87bb821a748 100644 --- a/src/panfrost/lib/pan_pool.c +++ b/src/panfrost/lib/pan_pool.c @@ -79,6 +79,8 @@ panfrost_create_pool(void *memctx, struct panfrost_device *dev, struct panfrost_transfer panfrost_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment) { + assert(alignment == util_next_power_of_two(alignment)); + /* Find or create a suitable BO */ struct panfrost_bo *bo = pool->transient_bo; unsigned offset = ALIGN_POT(pool->transient_offset, alignment); @@ -103,7 +105,13 @@ panfrost_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment mali_ptr panfrost_pool_upload(struct pan_pool *pool, const void *data, size_t sz) { - struct panfrost_transfer transfer = panfrost_pool_alloc(pool, sz); + return panfrost_pool_upload_aligned(pool, data, sz, sz); +} + +mali_ptr +panfrost_pool_upload_aligned(struct pan_pool *pool, const void *data, size_t sz, unsigned alignment) +{ + struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(pool, sz, alignment); memcpy(transfer.cpu, data, sz); return transfer.gpu; } diff --git a/src/panfrost/lib/pan_pool.h b/src/panfrost/lib/pan_pool.h index 08612a37065..c35f957e494 100644 --- a/src/panfrost/lib/pan_pool.h +++ b/src/panfrost/lib/pan_pool.h @@ -69,7 +69,8 @@ panfrost_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment static inline struct panfrost_transfer panfrost_pool_alloc(struct pan_pool *pool, size_t sz) { - return panfrost_pool_alloc_aligned(pool, sz, util_next_power_of_two(sz)); + assert(sz == util_next_power_of_two(sz)); + return panfrost_pool_alloc_aligned(pool, sz, sz); } struct panfrost_transfer @@ -78,4 +79,7 @@ panfrost_pool_alloc(struct pan_pool *pool, size_t sz); mali_ptr panfrost_pool_upload(struct pan_pool *pool, const void *data, size_t sz); +mali_ptr +panfrost_pool_upload_aligned(struct pan_pool *pool, const void *data, size_t sz, unsigned alignment); + #endif diff --git a/src/panfrost/lib/pan_scoreboard.c b/src/panfrost/lib/pan_scoreboard.c index f1ab2cac02c..c885dbae09d 100644 --- a/src/panfrost/lib/pan_scoreboard.c +++ b/src/panfrost/lib/pan_scoreboard.c @@ -195,7 +195,7 @@ panfrost_scoreboard_initialize_tiler(struct pan_pool *pool, .value_descriptor = MALI_WRITE_VALUE_ZERO, }; - struct panfrost_transfer transfer = panfrost_pool_alloc(pool, sizeof(job) + sizeof(payload)); + struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(pool, sizeof(job) + sizeof(payload), 64); memcpy(transfer.cpu, &job, sizeof(job)); memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); -- 2.30.2