/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
//#define DRY_RUN
-/* TODO: Sample size, etc */
-
-static void
-panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
-{
- struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
-
- job->msaa |= enabled;
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
-}
-
/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
* indepdent between color buffers and depth/stencil). To enable, we allocate
* the AFBC metadata buffer and mark that it is enabled. We do -not- actually
void
panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
{
+ struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+
if (with_vertex_data) {
panfrost_emit_vertex_data(ctx);
}
+ bool msaa = ctx->rasterizer->base.multisample;
+
if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
- panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample);
+
+ /* TODO: Sample size */
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
+ SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
}
+ /* Enable job requirements at draw-time */
+
+ if (msaa)
+ job->requirements |= PAN_REQ_MSAA;
+
+ if (ctx->depth_stencil->depth.writemask)
+ job->requirements |= PAN_REQ_DEPTH_WRITE;
+
if (ctx->occlusion_query) {
ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
struct pipe_surface *zsbuf;
};
+#define PAN_REQ_MSAA (1 << 0)
+#define PAN_REQ_DEPTH_WRITE (1 << 1)
+
/* A panfrost_job corresponds to a bound FBO we're rendering to,
* collecting over multiple draws. */
float clear_depth;
unsigned clear_stencil;
- /* Whether this job uses MSAA */
- bool msaa;
+ /* Whether this job uses the corresponding requirement (PAN_REQ_*
+ * bitmask) */
+ unsigned requirements;
+
};
/* Functions for managing the above */
fbx->ds_afbc.zero1 = 0x10009;
fbx->ds_afbc.padding = 0x1000;
-
- fb->unk3 |= MALI_MFBD_DEPTH_WRITE;
} else if (rsrc->bo->layout == PAN_LINEAR) {
fb->unk3 |= MALI_MFBD_EXTRA;
fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
rts[0].framebuffer_stride = 0;
}
- if (job->msaa) {
+ /* When scanning out, the depth buffer is immediately invalidated, so
+ * we don't need to waste bandwidth writing it out. This can improve
+ * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+ * memory bandwidth!).
+ *
+ * The exception is ReadPixels, but this is not supported on GLES so we
+ * can safely ignore it. */
+
+ if (panfrost_is_scanout(ctx)) {
+ job->requirements &= ~PAN_REQ_DEPTH_WRITE;
+ }
+
+ /* Actualize the requirements */
+
+ if (job->requirements & PAN_REQ_MSAA) {
rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
/* XXX */
fb.rt_count_2 = 4;
}
+ if (job->requirements & PAN_REQ_DEPTH_WRITE)
+ fb.unk3 |= MALI_MFBD_DEPTH_WRITE;
+
if (ctx->pipe_framebuffer.nr_cbufs == 1) {
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;