panfrost: Add RGB565, RGB5A1 texture formats
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index 6272578bd5f65cbc431a49561f8956c143e44574..9c0f0420e2b7ff90f5147a73ffe5c1ea03732746 100644 (file)
@@ -80,7 +80,7 @@ panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
         if (require_sfbd) {
                 SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled);
         } else {
-                SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled);
+                SET_BIT(ctx->fragment_rts[0].format.flags, MALI_MFBD_FORMAT_MSAA, enabled);
 
                 SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled);
 
@@ -167,7 +167,7 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
                 ctx->fragment_rts[0].afbc.stride = 0;
                 ctx->fragment_rts[0].afbc.unk = 0x30009;
 
-                ctx->fragment_rts[0].format |= MALI_MFBD_FORMAT_AFBC;
+                ctx->fragment_rts[0].format.flags |= MALI_MFBD_FORMAT_AFBC;
 
                 /* Point rendering to our special framebuffer */
                 ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
@@ -210,7 +210,12 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx)
                         assert(0);
                 }
 
-                ctx->fragment_rts[0].format = 0x80008000;
+                struct mali_rt_format null_rt = {
+                        .unk1 = 0x4000000,
+                        .unk4 = 0x8
+                };
+
+                ctx->fragment_rts[0].format = null_rt;
                 ctx->fragment_rts[0].framebuffer = 0;
                 ctx->fragment_rts[0].framebuffer_stride = 0;
         }
@@ -256,7 +261,28 @@ static struct bifrost_framebuffer
 panfrost_emit_mfbd(struct panfrost_context *ctx)
 {
         struct bifrost_framebuffer framebuffer = {
-                .tiler_meta = 0xf00000c600,
+                /* It is not yet clear what tiler_meta means or how it's
+                 * calculated, but we can tell the lower 32-bits are a
+                 * (monotonically increasing?) function of tile count and
+                 * geometry complexity; I suspect it defines a memory size of
+                 * some kind? for the tiler. It's really unclear at the
+                 * moment... but to add to the confusion, the hardware is happy
+                 * enough to accept a zero in this field, so we don't even have
+                 * to worry about it right now.
+                 *
+                 * The byte (just after the 32-bit mark) is much more
+                 * interesting. The higher nibble I've only ever seen as 0xF,
+                 * but the lower one I've seen as 0x0 or 0xF, and it's not
+                 * obvious what the difference is. But what -is- obvious is
+                 * that when the lower nibble is zero, performance is severely
+                 * degraded compared to when the lower nibble is set.
+                 * Evidently, that nibble enables some sort of fast path,
+                 * perhaps relating to caching or tile flush? Regardless, at
+                 * this point there's no clear reason not to set it, aside from
+                 * substantially increased memory requirements (of the misc_0
+                 * buffer) */
+
+                .tiler_meta = ((uint64_t) 0xff << 32) | 0x0,
 
                 .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
                 .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
@@ -271,10 +297,23 @@ panfrost_emit_mfbd(struct panfrost_context *ctx)
 
                 .unknown2 = 0x1f,
 
-                /* Presumably corresponds to unknown_address_X of SFBD */
+                /* Corresponds to unknown_address_X of SFBD */
                 .scratchpad = ctx->scratchpad.gpu,
                 .tiler_scratch_start  = ctx->misc_0.gpu,
-                .tiler_scratch_middle = ctx->misc_0.gpu + /*ctx->misc_0.size*/40960, /* Size depends on the size of the framebuffer and the number of vertices */
+
+                /* The constant added here is, like the lower word of
+                 * tiler_meta, (loosely) another product of framebuffer size
+                 * and geometry complexity. It must be sufficiently large for
+                 * the tiler_meta fast path to work; if it's too small, there
+                 * will be DATA_INVALID_FAULTs. Conversely, it must be less
+                 * than the total size of misc_0, or else there's no room. It's
+                 * possible this constant configures a partition between two
+                 * parts of misc_0? We haven't investigated the functionality,
+                 * as these buffers are internally used by the hardware
+                 * (presumably by the tiler) but not seemingly touched by the driver
+                 */
+
+                .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
 
                 .tiler_heap_start = ctx->tiler_heap.gpu,
                 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
@@ -341,9 +380,20 @@ panfrost_new_frag_framebuffer(struct panfrost_context *ctx)
                 fb.rt_count_2 = 1;
                 fb.unk3 = 0x100;
 
+                /* By default, Gallium seems to need a BGR framebuffer */
+                unsigned char bgra[4] = {
+                        PIPE_SWIZZLE_Z, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_X, PIPE_SWIZZLE_W
+                };
+
                 struct bifrost_render_target rt = {
-                        .unk1 = 0x4000000,
-                        .format = 0x860a8899, /* RGBA32, no MSAA */
+                        .format = {
+                                .unk1 = 0x4000000,
+                                .unk2 = 0x1,
+                                .nr_channels = MALI_POSITIVE(4),
+                                .flags = 0x444,
+                                .swizzle = panfrost_translate_swizzle_4(bgra),
+                                .unk4 = 0x8
+                        },
                         .framebuffer = framebuffer,
                         .framebuffer_stride = (stride / 16) & 0xfffffff,
                 };
@@ -548,25 +598,29 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
 
 static void
 panfrost_viewport(struct panfrost_context *ctx,
-                  float depth_range_n,
-                  float depth_range_f,
+                  float depth_clip_near,
+                  float depth_clip_far,
                   int viewport_x0, int viewport_y0,
                   int viewport_x1, int viewport_y1)
 {
-        /* Viewport encoding is asymmetric. Purpose of the floats is unknown? */
+        /* Clip bounds are encoded as floats. The viewport itself is encoded as
+         * (somewhat) asymmetric ints. */
 
         struct mali_viewport ret = {
-                .floats = {
-#if 0
-                        -inff, -inff,
-                        inff, inff,
-#endif
-                        0.0, 0.0,
-                        2048.0, 1600.0,
-                },
+                /* By default, do no viewport clipping, i.e. clip to (-inf,
+                 * inf) in each direction. Clipping to the viewport in theory
+                 * should work, but in practice causes issues when we're not
+                 * explicitly trying to scissor */
+
+                .clip_minx = -inff,
+                .clip_miny = -inff,
+                .clip_maxx = inff,
+                .clip_maxy = inff,
+
+                /* We always perform depth clipping (TODO: Can this be disabled?) */
 
-                .depth_range_n = depth_range_n,
-                .depth_range_f = depth_range_f,
+                .clip_minz = depth_clip_near,
+                .clip_maxz = depth_clip_far,
 
                 .viewport0 = { viewport_x0, viewport_y0 },
                 .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) },
@@ -1533,11 +1587,15 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate)
                 screen->driver->force_flush_fragment(ctx);
 
 #ifdef DUMP_PERFORMANCE_COUNTERS
-        char filename[128];
-        snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number);
-        FILE *fp = fopen(filename, "wb");
-        fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
-        fclose(fp);
+        if (screen->driver->dump_counters) {
+                screen->driver->dump_counters(screen);
+
+                char filename[128];
+                snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number);
+                FILE *fp = fopen(filename, "wb");
+                fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
+                fclose(fp);
+        }
 #endif
 
 #endif
@@ -1894,7 +1952,9 @@ panfrost_bind_vertex_elements_state(
 static void
 panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
 {
-        printf("Vertex elements delete leaks descriptor\n");
+        struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso;
+        unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements;
+        printf("Vertex elements delete leaks descriptor (%d bytes)\n", bytes);
         free(hwcso);
 }
 
@@ -1919,7 +1979,15 @@ panfrost_delete_shader_state(
         struct pipe_context *pctx,
         void *so)
 {
-        printf("Deleting shader state maybe leaks tokens, per-variant compiled shaders, per-variant  descriptors\n");
+        struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
+
+        if (cso->base.type == PIPE_SHADER_IR_TGSI) {
+                printf("Deleting TGSI shader leaks duplicated tokens\n");
+        }
+
+        unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta);
+        printf("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak);
+
         free(so);
 }
 
@@ -2286,8 +2354,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
         ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
         ctx->pipe_framebuffer.samples = fb->samples;
         ctx->pipe_framebuffer.layers = fb->layers;
-        ctx->pipe_framebuffer.width = ALIGN(fb->width, 16);
-        ctx->pipe_framebuffer.height = ALIGN(fb->height, 16);
+        ctx->pipe_framebuffer.width = fb->width;
+        ctx->pipe_framebuffer.height = fb->height;
 
         for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
                 struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
@@ -2412,7 +2480,12 @@ static void
 panfrost_delete_blend_state(struct pipe_context *pipe,
                             void *blend)
 {
-        printf("Deleting blend state may leak blend shader\n");
+        struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend;
+
+        if (so->has_blend_shader) {
+                printf("Deleting blend state leak blend shaders bytecode\n");
+        }
+
         free(blend);
 }
 
@@ -2674,10 +2747,10 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
         }
 
         screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0);
-        screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 0, 0, 0);
+        screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0);
         screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
-        screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_GROWABLE, 1, 128);
-        screen->driver->allocate_slab(screen, &ctx->misc_0, 128, false, PAN_ALLOCATE_GROWABLE, 1, 128);
+        screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
+        screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
 
 }