In most cases, GPU data structures need only be self-aligned; the
worst-case 128 byte alignment is wasteful. By passing explicit
alignments, we can reduce memory usage, avoid extra allocations, and
improve descriptor cache locality.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6373>
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
- out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
- info->count *
- info->index_size);
+ struct panfrost_transfer T =
+ panfrost_pool_alloc_aligned(&batch->pool,
+ info->count * info->index_size,
+ info->index_size);
+
+ memcpy(T.cpu, ibuf8 + offset, info->count * info->index_size);
+ out = T.gpu;
panfrost_frag_shader_meta_init(ctx, &meta, rts);
panfrost_frag_shader_meta_init(ctx, &meta, rts);
- xfer = panfrost_pool_alloc(&batch->pool, desc_size);
+ xfer = panfrost_pool_alloc_aligned(&batch->pool, desc_size, sizeof(meta));
memcpy(xfer.cpu, &meta, sizeof(meta));
memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
memcpy(xfer.cpu, &meta, sizeof(meta));
memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
size_t size = sys_size + uniform_size;
size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
size_t size = sys_size + uniform_size;
- struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
- size);
+ struct panfrost_transfer transfer =
+ panfrost_pool_alloc_aligned(&batch->pool, size, 16);
/* Upload sysvals requested by the shader */
panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
/* Upload sysvals requested by the shader */
panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
assert(ubo_count >= 1);
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
assert(ubo_count >= 1);
size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
- struct panfrost_transfer ubos = panfrost_pool_alloc(&batch->pool, sz);
+ struct panfrost_transfer ubos =
+ panfrost_pool_alloc_aligned(&batch->pool, sz,
+ MALI_UNIFORM_BUFFER_LENGTH);
+
uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
/* Upload uniforms as a UBO */
uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
/* Upload uniforms as a UBO */
return;
if (device->quirks & IS_BIFROST) {
return;
if (device->quirks & IS_BIFROST) {
- struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
+ struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
MALI_BIFROST_TEXTURE_LENGTH *
MALI_BIFROST_TEXTURE_LENGTH *
- ctx->sampler_view_count[stage]);
+ ctx->sampler_view_count[stage],
+ MALI_BIFROST_TEXTURE_LENGTH);
struct mali_bifrost_texture_packed *out =
(struct mali_bifrost_texture_packed *) T.cpu;
struct mali_bifrost_texture_packed *out =
(struct mali_bifrost_texture_packed *) T.cpu;
assert(MALI_BIFROST_SAMPLER_LENGTH == MALI_MIDGARD_SAMPLER_LENGTH);
size_t sz = desc_size * ctx->sampler_count[stage];
assert(MALI_BIFROST_SAMPLER_LENGTH == MALI_MIDGARD_SAMPLER_LENGTH);
size_t sz = desc_size * ctx->sampler_count[stage];
- struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool, sz);
+ struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool, sz, desc_size);
struct mali_midgard_sampler_packed *out = (struct mali_midgard_sampler_packed *) T.cpu;
for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
struct mali_midgard_sampler_packed *out = (struct mali_midgard_sampler_packed *) T.cpu;
for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
/* Worst case: everything is NPOT */
/* Worst case: everything is NPOT */
- struct panfrost_transfer S = panfrost_pool_alloc(&batch->pool,
- MALI_ATTRIBUTE_LENGTH * PIPE_MAX_ATTRIBS * 2);
+ struct panfrost_transfer S = panfrost_pool_alloc_aligned(&batch->pool,
+ MALI_ATTRIBUTE_LENGTH * PIPE_MAX_ATTRIBS * 2,
+ MALI_ATTRIBUTE_LENGTH);
- struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
- MALI_ATTRIBUTE_LENGTH * (PAN_INSTANCE_ID + 1));
+ struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
+ MALI_ATTRIBUTE_LENGTH * (PAN_INSTANCE_ID + 1),
+ MALI_ATTRIBUTE_LENGTH);
struct mali_attribute_buffer_packed *bufs =
(struct mali_attribute_buffer_packed *) S.cpu;
struct mali_attribute_buffer_packed *bufs =
(struct mali_attribute_buffer_packed *) S.cpu;
unsigned stride, unsigned count)
{
unsigned size = stride * count;
unsigned stride, unsigned count)
{
unsigned size = stride * count;
- mali_ptr ptr = panfrost_pool_alloc(&batch->invisible_pool, size).gpu;
+ mali_ptr ptr = panfrost_pool_alloc_aligned(&batch->invisible_pool, size, 64).gpu;
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
cfg.stride = stride;
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
cfg.stride = stride;
vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
- struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
- vs_size +
- fs_size);
+ struct panfrost_transfer trans = panfrost_pool_alloc_aligned(
+ &batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
struct pipe_stream_output_info *so = &vs->stream_output;
unsigned present = pan_varying_present(vs, fs, dev->quirks);
struct pipe_stream_output_info *so = &vs->stream_output;
unsigned present = pan_varying_present(vs, fs, dev->quirks);
}
unsigned xfb_base = pan_xfb_base(present);
}
unsigned xfb_base = pan_xfb_base(present);
- struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
- MALI_ATTRIBUTE_BUFFER_LENGTH * (xfb_base + ctx->streamout.num_targets));
+ struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
+ MALI_ATTRIBUTE_BUFFER_LENGTH * (xfb_base + ctx->streamout.num_targets),
+ MALI_ATTRIBUTE_BUFFER_LENGTH);
struct mali_attribute_buffer_packed *varyings =
(struct mali_attribute_buffer_packed *) T.cpu;
struct mali_attribute_buffer_packed *varyings =
(struct mali_attribute_buffer_packed *) T.cpu;
sizeof(struct mali_single_framebuffer) :
sizeof(struct mali_framebuffer);
sizeof(struct mali_single_framebuffer) :
sizeof(struct mali_framebuffer);
- batch->framebuffer = panfrost_pool_alloc(&batch->pool, size);
+ batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64);
/* Tag the pointer */
if (!(dev->quirks & MIDGARD_SFBD))
/* Tag the pointer */
if (!(dev->quirks & MIDGARD_SFBD))
blend_shader = bo->gpu | b->first_tag;
}
blend_shader = bo->gpu | b->first_tag;
}
- struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
- 4 * 4 * 6 * rsrc->damage.inverted_len);
+ struct panfrost_transfer transfer = panfrost_pool_alloc_aligned(&batch->pool,
+ 4 * 4 * 6 * rsrc->damage.inverted_len, 64);
for (unsigned i = 0; i < rsrc->damage.inverted_len; ++i) {
float *o = (float *) (transfer.cpu + (4 * 4 * 6 * i));
for (unsigned i = 0; i < rsrc->damage.inverted_len; ++i) {
float *o = (float *) (transfer.cpu + (4 * 4 * 6 * i));
sizeof(struct mali_render_target) * 8;
struct panfrost_transfer m_f_trans =
sizeof(struct mali_render_target) * 8;
struct panfrost_transfer m_f_trans =
- panfrost_pool_alloc(&batch->pool, total_sz);
+ panfrost_pool_alloc_aligned(&batch->pool, total_sz, 64);
* textures, removing the need to separately key the blit shaders for
* 2D and 3D variants */
* textures, removing the need to separately key the blit shaders for
* 2D and 3D variants */
- struct panfrost_transfer texture_t = panfrost_pool_alloc(pool, MALI_MIDGARD_TEXTURE_LENGTH + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1));
+ struct panfrost_transfer texture_t = panfrost_pool_alloc_aligned(
+ pool, MALI_MIDGARD_TEXTURE_LENGTH + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1), 128);
panfrost_new_texture(texture_t.cpu,
image->width0, image->height0,
panfrost_new_texture(texture_t.cpu,
image->width0, image->height0,
pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg)
cfg.normalized_coordinates = false;
pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg)
cfg.normalized_coordinates = false;
- struct panfrost_transfer shader_meta_t = panfrost_pool_alloc(pool, sizeof(shader_meta) + 8 * sizeof(struct midgard_blend_rt));
+ struct panfrost_transfer shader_meta_t = panfrost_pool_alloc_aligned(
+ pool, sizeof(shader_meta) + 8 * sizeof(struct midgard_blend_rt), 128);
+
memcpy(shader_meta_t.cpu, &shader_meta, sizeof(shader_meta));
for (unsigned i = 0; i < 8; ++i) {
memcpy(shader_meta_t.cpu, &shader_meta, sizeof(shader_meta));
for (unsigned i = 0; i < 8; ++i) {
#include "pan_bo.h"
#include "pan_pool.h"
#include "pan_bo.h"
#include "pan_pool.h"
-/* TODO: What does this actually have to be? */
-#define ALIGNMENT 128
-
/* Transient command stream pooling: command stream uploads try to simply copy
* into whereever we left off. If there isn't space, we allocate a new entry
* into the pool and copy there */
/* Transient command stream pooling: command stream uploads try to simply copy
* into whereever we left off. If there isn't space, we allocate a new entry
* into the pool and copy there */
}
struct panfrost_transfer
}
struct panfrost_transfer
-panfrost_pool_alloc(struct pan_pool *pool, size_t sz)
+panfrost_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment)
- /* Pad the size */
- sz = ALIGN_POT(sz, ALIGNMENT);
-
/* Find or create a suitable BO */
struct panfrost_bo *bo = pool->transient_bo;
/* Find or create a suitable BO */
struct panfrost_bo *bo = pool->transient_bo;
- unsigned offset = pool->transient_offset;
+ unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
/* If we don't fit, allocate a new backing */
if (unlikely(bo == NULL || (offset + sz) >= TRANSIENT_SLAB_SIZE)) {
/* If we don't fit, allocate a new backing */
if (unlikely(bo == NULL || (offset + sz) >= TRANSIENT_SLAB_SIZE)) {
- pool->transient_offset += sz;
+ pool->transient_offset = offset + sz;
struct panfrost_transfer ret = {
.cpu = bo->cpu + offset,
struct panfrost_transfer ret = {
.cpu = bo->cpu + offset,
+struct panfrost_transfer
+panfrost_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment);
+
+/* Default to self-alignment */
+
+static inline struct panfrost_transfer
+panfrost_pool_alloc(struct pan_pool *pool, size_t sz)
+{
+ return panfrost_pool_alloc_aligned(pool, sz, util_next_power_of_two(sz));
+}
+
struct panfrost_transfer
panfrost_pool_alloc(struct pan_pool *pool, size_t sz);
struct panfrost_transfer
panfrost_pool_alloc(struct pan_pool *pool, size_t sz);
if (inject)
job.next_job = scoreboard->first_job;
if (inject)
job.next_job = scoreboard->first_job;
- struct panfrost_transfer transfer = panfrost_pool_alloc(pool, sizeof(job) + payload_size);
+ struct panfrost_transfer transfer =
+ panfrost_pool_alloc_aligned(pool, sizeof(job) + payload_size, 64);
memcpy(transfer.cpu, &job, sizeof(job));
memcpy(transfer.cpu + sizeof(job), payload, payload_size);
memcpy(transfer.cpu, &job, sizeof(job));
memcpy(transfer.cpu + sizeof(job), payload, payload_size);