if (require_sfbd) {
SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled);
} else {
- SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled);
+ SET_BIT(ctx->fragment_rts[0].format.flags, MALI_MFBD_FORMAT_MSAA, enabled);
SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled);
ctx->fragment_rts[0].afbc.stride = 0;
ctx->fragment_rts[0].afbc.unk = 0x30009;
- ctx->fragment_rts[0].format |= MALI_MFBD_FORMAT_AFBC;
+ ctx->fragment_rts[0].format.flags |= MALI_MFBD_FORMAT_AFBC;
/* Point rendering to our special framebuffer */
ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
assert(0);
}
- ctx->fragment_rts[0].format = 0x80008000;
+ struct mali_rt_format null_rt = {
+ .unk1 = 0x4000000,
+ .unk4 = 0x8
+ };
+
+ ctx->fragment_rts[0].format = null_rt;
ctx->fragment_rts[0].framebuffer = 0;
ctx->fragment_rts[0].framebuffer_stride = 0;
}
panfrost_emit_mfbd(struct panfrost_context *ctx)
{
struct bifrost_framebuffer framebuffer = {
- .tiler_meta = 0xf00000c600,
+ /* It is not yet clear what tiler_meta means or how it's
+ * calculated, but we can tell the lower 32-bits are a
+ * (monotonically increasing?) function of tile count and
+ * geometry complexity; I suspect it defines a memory size of
+ * some kind? for the tiler. It's really unclear at the
+ * moment... but to add to the confusion, the hardware is happy
+ * enough to accept a zero in this field, so we don't even have
+ * to worry about it right now.
+ *
+ * The byte (just after the 32-bit mark) is much more
+ * interesting. The higher nibble I've only ever seen as 0xF,
+ * but the lower one I've seen as 0x0 or 0xF, and it's not
+ * obvious what the difference is. But what -is- obvious is
+ * that when the lower nibble is zero, performance is severely
+ * degraded compared to when the lower nibble is set.
+ * Evidently, that nibble enables some sort of fast path,
+ * perhaps relating to caching or tile flush? Regardless, at
+ * this point there's no clear reason not to set it, aside from
+ * substantially increased memory requirements (of the misc_0
+ * buffer) */
+
+ .tiler_meta = ((uint64_t) 0xff << 32) | 0x0,
.width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
.height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
.unknown2 = 0x1f,
- /* Presumably corresponds to unknown_address_X of SFBD */
+ /* Corresponds to unknown_address_X of SFBD */
.scratchpad = ctx->scratchpad.gpu,
.tiler_scratch_start = ctx->misc_0.gpu,
- .tiler_scratch_middle = ctx->misc_0.gpu + /*ctx->misc_0.size*/40960, /* Size depends on the size of the framebuffer and the number of vertices */
+
+ /* The constant added here is, like the lower word of
+ * tiler_meta, (loosely) another product of framebuffer size
+ * and geometry complexity. It must be sufficiently large for
+ * the tiler_meta fast path to work; if it's too small, there
+ * will be DATA_INVALID_FAULTs. Conversely, it must be less
+ * than the total size of misc_0, or else there's no room. It's
+ * possible this constant configures a partition between two
+ * parts of misc_0? We haven't investigated the functionality,
+ * as these buffers are internally used by the hardware
+ * (presumably by the tiler) but not seemingly touched by the driver
+ */
+
+ .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
.tiler_heap_start = ctx->tiler_heap.gpu,
.tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
fb.rt_count_2 = 1;
fb.unk3 = 0x100;
+ /* By default, Gallium seems to need a BGR framebuffer */
+ unsigned char bgra[4] = {
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_X, PIPE_SWIZZLE_W
+ };
+
struct bifrost_render_target rt = {
- .unk1 = 0x4000000,
- .format = 0x860a8899, /* RGBA32, no MSAA */
+ .format = {
+ .unk1 = 0x4000000,
+ .unk2 = 0x1,
+ .nr_channels = MALI_POSITIVE(4),
+ .flags = 0x444,
+ .swizzle = panfrost_translate_swizzle_4(bgra),
+ .unk4 = 0x8
+ },
.framebuffer = framebuffer,
.framebuffer_stride = (stride / 16) & 0xfffffff,
};
static void
panfrost_viewport(struct panfrost_context *ctx,
- float depth_range_n,
- float depth_range_f,
+ float depth_clip_near,
+ float depth_clip_far,
int viewport_x0, int viewport_y0,
int viewport_x1, int viewport_y1)
{
- /* Viewport encoding is asymmetric. Purpose of the floats is unknown? */
+ /* Clip bounds are encoded as floats. The viewport itself is encoded as
+ * (somewhat) asymmetric ints. */
struct mali_viewport ret = {
- .floats = {
-#if 0
- -inff, -inff,
- inff, inff,
-#endif
- 0.0, 0.0,
- 2048.0, 1600.0,
- },
+ /* By default, do no viewport clipping, i.e. clip to (-inf,
+ * inf) in each direction. Clipping to the viewport in theory
+ * should work, but in practice causes issues when we're not
+ * explicitly trying to scissor */
+
+ .clip_minx = -inff,
+ .clip_miny = -inff,
+ .clip_maxx = inff,
+ .clip_maxy = inff,
+
+ /* We always perform depth clipping (TODO: Can this be disabled?) */
- .depth_range_n = depth_range_n,
- .depth_range_f = depth_range_f,
+ .clip_minz = depth_clip_near,
+ .clip_maxz = depth_clip_far,
.viewport0 = { viewport_x0, viewport_y0 },
.viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) },
screen->driver->force_flush_fragment(ctx);
#ifdef DUMP_PERFORMANCE_COUNTERS
- char filename[128];
- snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number);
- FILE *fp = fopen(filename, "wb");
- fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp);
- fclose(fp);
+ if (screen->driver->dump_counters) {
+ screen->driver->dump_counters(screen);
+
+ char filename[128];
+ snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number);
+ FILE *fp = fopen(filename, "wb");
+ fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp);
+ fclose(fp);
+ }
#endif
#endif
static void
panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
{
- printf("Vertex elements delete leaks descriptor\n");
+ struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso;
+ unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements;
+ printf("Vertex elements delete leaks descriptor (%d bytes)\n", bytes);
free(hwcso);
}
struct pipe_context *pctx,
void *so)
{
- printf("Deleting shader state maybe leaks tokens, per-variant compiled shaders, per-variant descriptors\n");
+ struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
+
+ if (cso->base.type == PIPE_SHADER_IR_TGSI) {
+ printf("Deleting TGSI shader leaks duplicated tokens\n");
+ }
+
+ unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta);
+ printf("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak);
+
free(so);
}
ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
ctx->pipe_framebuffer.samples = fb->samples;
ctx->pipe_framebuffer.layers = fb->layers;
- ctx->pipe_framebuffer.width = ALIGN(fb->width, 16);
- ctx->pipe_framebuffer.height = ALIGN(fb->height, 16);
+ ctx->pipe_framebuffer.width = fb->width;
+ ctx->pipe_framebuffer.height = fb->height;
for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
panfrost_delete_blend_state(struct pipe_context *pipe,
void *blend)
{
- printf("Deleting blend state may leak blend shader\n");
+ struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend;
+
+ if (so->has_blend_shader) {
+ printf("Deleting blend state leak blend shaders bytecode\n");
+ }
+
free(blend);
}
}
screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0);
- screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 0, 0, 0);
+ screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
- screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_GROWABLE, 1, 128);
- screen->driver->allocate_slab(screen, &ctx->misc_0, 128, false, PAN_ALLOCATE_GROWABLE, 1, 128);
+ screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
+ screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
}