size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
- /* We can't reuse the current BO, but we can create a new one. */
- bo = panfrost_batch_create_bo(batch, bo_sz, 0);
+ /* We can't reuse the current BO, but we can create a new one.
+ * We don't know what the BO will be used for, so let's flag it
+ * RW and attach it to both the fragment and vertex/tiler jobs.
+ * TODO: if we want fine grained BO assignment we should pass
+ * flags to this function and keep the read/write,
+ * fragment/vertex+tiler pools separate.
+ */
+ bo = panfrost_batch_create_bo(batch, bo_sz, 0,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
if (sz < TRANSIENT_SLAB_SIZE) {
batch->transient_bo = bo;
/* Upload the shader */
final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
- PAN_BO_EXECUTE);
+ PAN_BO_EXECUTE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_READ |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
if (shader->patch_index) {
* let the BO logic know about this contraint. */
#define PAN_BO_DONT_REUSE (1 << 5)
+/* GPU access flags */
+
+/* BO is either shared (can be accessed by more than one GPU batch) or private
+ * (reserved by a specific GPU job). */
+#define PAN_BO_ACCESS_PRIVATE (0 << 0)
+#define PAN_BO_ACCESS_SHARED (1 << 0)
+
+/* BO is being read/written by the GPU */
+#define PAN_BO_ACCESS_READ (1 << 1)
+#define PAN_BO_ACCESS_WRITE (1 << 2)
+#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
+
+/* BO is accessed by the vertex/tiler job. */
+#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
+
+/* BO is accessed by the fragment job. */
+#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
+
struct panfrost_bo {
/* Must be first for casting */
struct list_head link;
uint32_t flags;
};
+static inline uint32_t
+panfrost_bo_access_for_stage(enum pipe_shader_type stage)
+{
+ assert(stage == PIPE_SHADER_FRAGMENT ||
+ stage == PIPE_SHADER_VERTEX);
+
+ return stage == PIPE_SHADER_FRAGMENT ?
+ PAN_BO_ACCESS_FRAGMENT :
+ PAN_BO_ACCESS_VERTEX_TILER;
+}
+
void
panfrost_bo_reference(struct panfrost_bo *bo);
void
static mali_ptr
panfrost_upload_tex(
struct panfrost_context *ctx,
+ enum pipe_shader_type st,
struct panfrost_sampler_view *view)
{
if (!view)
/* Add the BO to the job so it's retained until the job is done. */
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo,
+ PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+ panfrost_bo_access_for_stage(st));
/* Add the usage flags in, since they can change across the CSO
* lifetime due to layout switches */
for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
trampolines[i] =
- panfrost_upload_tex(ctx, ctx->sampler_views[t][i]);
+ panfrost_upload_tex(ctx, t, ctx->sampler_views[t][i]);
trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
}
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
- panfrost_batch_add_bo(batch, bo);
+ panfrost_batch_add_bo(batch, bo,
+ PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
+ panfrost_bo_access_for_stage(st));
/* Upload address and size as sysval */
uniform->du[0] = bo->gpu + sb.buffer_offset;
static mali_ptr
panfrost_map_constant_buffer_gpu(
struct panfrost_context *ctx,
+ enum pipe_shader_type st,
struct panfrost_constant_buffer *buf,
unsigned index)
{
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
if (rsrc) {
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo,
+ PAN_BO_ACCESS_SHARED |
+ PAN_BO_ACCESS_READ |
+ panfrost_bo_access_for_stage(st));
return rsrc->bo->gpu;
} else if (cb->user_buffer) {
return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size);
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
/* Add the shader BO to the batch. */
- panfrost_batch_add_bo(batch, ss->bo);
+ panfrost_batch_add_bo(batch, ss->bo,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_READ |
+ panfrost_bo_access_for_stage(stage));
/* We can't reuse over frames; that's not safe. The descriptor must be
* transient uploaded */
continue;
}
- mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo);
+ mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, i, buf, ubo);
unsigned bytes_per_field = 16;
unsigned aligned = ALIGN_POT(usz, bytes_per_field);
if (!info->has_user_indices) {
/* Only resources can be directly mapped */
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo,
+ PAN_BO_ACCESS_SHARED |
+ PAN_BO_ACCESS_READ |
+ PAN_BO_ACCESS_VERTEX_TILER);
return rsrc->bo->gpu + offset;
} else {
/* Otherwise, we need to upload to transient memory */
unsigned chopped_addr = raw_addr - addr;
/* Add a dependency of the batch on the vertex buffer */
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo,
+ PAN_BO_ACCESS_SHARED |
+ PAN_BO_ACCESS_READ |
+ PAN_BO_ACCESS_VERTEX_TILER);
/* Set common fields */
attrs[k].elements = addr;
}
void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+ uint32_t flags)
{
if (!bo)
return;
void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
{
+ uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT;
+
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
struct panfrost_resource *rsrc = pan_resource(batch->key.cbufs[i]->texture);
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo, flags);
}
if (batch->key.zsbuf) {
struct panfrost_resource *rsrc = pan_resource(batch->key.zsbuf->texture);
- panfrost_batch_add_bo(batch, rsrc->bo);
+ panfrost_batch_add_bo(batch, rsrc->bo, flags);
}
}
struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
- uint32_t create_flags)
+ uint32_t create_flags, uint32_t access_flags)
{
struct panfrost_bo *bo;
bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size,
create_flags);
- panfrost_batch_add_bo(batch, bo);
+ panfrost_batch_add_bo(batch, bo, access_flags);
/* panfrost_batch_add_bo() has retained a reference and
* panfrost_bo_create() initialize the refcnt to 1, so let's
/* Create the BO as invisible, as there's no reason to map */
batch->polygon_list = panfrost_batch_create_bo(batch, size,
- PAN_BO_INVISIBLE);
+ PAN_BO_INVISIBLE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
}
return batch->polygon_list->gpu;
return batch->scratchpad;
batch->scratchpad = panfrost_batch_create_bo(batch, 64 * 4 * 4096,
- PAN_BO_INVISIBLE);
+ PAN_BO_INVISIBLE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
assert(batch->scratchpad);
return batch->scratchpad;
}
batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096,
PAN_BO_INVISIBLE |
- PAN_BO_GROWABLE);
+ PAN_BO_GROWABLE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
assert(batch->tiler_heap);
return batch->tiler_heap;
}
return batch->tiler_dummy;
batch->tiler_dummy = panfrost_batch_create_bo(batch, 4096,
- PAN_BO_INVISIBLE);
+ PAN_BO_INVISIBLE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
assert(batch->tiler_dummy);
return batch->tiler_dummy;
}
panfrost_batch_init(struct panfrost_context *ctx);
void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo);
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+ uint32_t flags);
void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);
struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
- uint32_t create_flags);
+ uint32_t create_flags, uint32_t access_flags);
void
panfrost_batch_submit(struct panfrost_batch *batch);
/* Grab the BO and bind it to the batch */
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
- panfrost_batch_add_bo(batch, bo);
+
+ /* Varyings are WRITE from the perspective of the VERTEX but READ from
+ * the perspective of the TILER and FRAGMENT.
+ */
+ panfrost_batch_add_bo(batch, bo,
+ PAN_BO_ACCESS_SHARED |
+ PAN_BO_ACCESS_RW |
+ PAN_BO_ACCESS_VERTEX_TILER |
+ PAN_BO_ACCESS_FRAGMENT);
mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
slot->elements = addr;