panfrost: Prepare shader_meta descriptors at emission time
authorBoris Brezillon <boris.brezillon@collabora.com>
Thu, 5 Mar 2020 15:20:18 +0000 (16:20 +0100)
committerBoris Brezillon <boris.brezillon@collabora.com>
Tue, 10 Mar 2020 11:47:34 +0000 (12:47 +0100)
This way we avoid potential state leaks and keep the shader_meta
initialization in once place. The time spent preparing the shader
descriptors should be negligible compared to the time spent pushing
those descriptors to the transient buffer (remember we are writing to
non-cacheable memory here).

Note that we might get back to some sort of shader_meta descriptor
caching at some point if that proves necessary, but now we have those
panfrost_frag_meta_xxx_update() helpers now where xxx maps directly to
a CSO bind, which should ease desc template updates.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4083>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_blend_cso.c
src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_cmdstream.h
src/gallium/drivers/panfrost/pan_compute.c
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pan_context.h

index 2ceb5ce92d2e35e7733153d84446a507b6a17646..f8874cee60a9e7bd83759d2f616bf14585f675ca 100644 (file)
 #include "tgsi/tgsi_dump.h"
 
 void
-panfrost_shader_compile(
-                struct panfrost_context *ctx,
-                struct mali_shader_meta *meta,
-                enum pipe_shader_ir ir_type,
-                const void *ir,
-                gl_shader_stage stage,
-                struct panfrost_shader_state *state,
-                uint64_t *outputs_written)
+panfrost_shader_compile(struct panfrost_context *ctx,
+                        enum pipe_shader_ir ir_type,
+                        const void *ir,
+                        gl_shader_stage stage,
+                        struct panfrost_shader_state *state,
+                        uint64_t *outputs_written)
 {
         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
         uint8_t *dst;
@@ -80,11 +78,9 @@ panfrost_shader_compile(
         if (size) {
                 state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
                 memcpy(state->bo->cpu, dst, size);
-                meta->shader = state->bo->gpu | program.first_tag;
                 state->first_tag = program.first_tag;
         } else {
                 /* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */
-                meta->shader = 0x0 | 1;
                 state->first_tag = 1;
         }
 
@@ -95,9 +91,6 @@ panfrost_shader_compile(
         state->sysval_count = program.sysval_count;
         memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
 
-        meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
-        meta->midgard1.work_count = program.work_register_count;
-
         bool vertex_id = s->info.system_values_read & (1 << SYSTEM_VALUE_VERTEX_ID);
         bool instance_id = s->info.system_values_read & (1 << SYSTEM_VALUE_INSTANCE_ID);
 
@@ -145,10 +138,6 @@ panfrost_shader_compile(
         state->uniform_cutoff = program.uniform_cutoff;
         state->work_reg_count = program.work_register_count;
 
-        meta->attribute_count = state->attribute_count;
-        meta->varying_count = state->varying_count;
-        meta->midgard1.flags_hi = 8; /* XXX */
-
         unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
         unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
         unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
index 8b4635ca2864358f633a4ab79920f6127903cf25..d0824ed488f638e87d52f8d435137421d20b5685 100644 (file)
@@ -144,17 +144,12 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
                           void *cso)
 {
         struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
         struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
         struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
         ctx->blend = pblend;
 
         if (!blend)
                 return;
-
-        if (screen->quirks & MIDGARD_SFBD) {
-                SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
-        }
 }
 
 static void
index b6b569ceb5f56f04bf7ca03e749641bf43a3f81f..ddef5c107649146fc3e2a335e9aa6fa01ffa38b7 100644 (file)
@@ -99,6 +99,333 @@ panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
                 tp->postfix.occlusion_counter = 0;
 }
 
+static void
+panfrost_shader_meta_init(struct panfrost_context *ctx,
+                          enum pipe_shader_type st,
+                          struct mali_shader_meta *meta)
+{
+        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
+
+        memset(meta, 0, sizeof(*meta));
+        meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
+        meta->midgard1.uniform_count = MIN2(ss->uniform_count,
+                                            ss->uniform_cutoff);
+        meta->midgard1.work_count = ss->work_reg_count;
+        meta->attribute_count = ss->attribute_count;
+        meta->varying_count = ss->varying_count;
+        meta->midgard1.flags_hi = 0x8; /* XXX */
+        meta->midgard1.flags_lo = 0x220;
+        meta->texture_count = ctx->sampler_view_count[st];
+        meta->sampler_count = ctx->sampler_count[st];
+        meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
+}
+
+unsigned
+panfrost_translate_compare_func(enum pipe_compare_func in)
+{
+        switch (in) {
+        case PIPE_FUNC_NEVER:
+                return MALI_FUNC_NEVER;
+
+        case PIPE_FUNC_LESS:
+                return MALI_FUNC_LESS;
+
+        case PIPE_FUNC_EQUAL:
+                return MALI_FUNC_EQUAL;
+
+        case PIPE_FUNC_LEQUAL:
+                return MALI_FUNC_LEQUAL;
+
+        case PIPE_FUNC_GREATER:
+                return MALI_FUNC_GREATER;
+
+        case PIPE_FUNC_NOTEQUAL:
+                return MALI_FUNC_NOTEQUAL;
+
+        case PIPE_FUNC_GEQUAL:
+                return MALI_FUNC_GEQUAL;
+
+        case PIPE_FUNC_ALWAYS:
+                return MALI_FUNC_ALWAYS;
+
+        default:
+                unreachable("Invalid func");
+        }
+}
+
+static unsigned
+panfrost_translate_stencil_op(enum pipe_stencil_op in)
+{
+        switch (in) {
+        case PIPE_STENCIL_OP_KEEP:
+                return MALI_STENCIL_KEEP;
+
+        case PIPE_STENCIL_OP_ZERO:
+                return MALI_STENCIL_ZERO;
+
+        case PIPE_STENCIL_OP_REPLACE:
+               return MALI_STENCIL_REPLACE;
+
+        case PIPE_STENCIL_OP_INCR:
+                return MALI_STENCIL_INCR;
+
+        case PIPE_STENCIL_OP_DECR:
+                return MALI_STENCIL_DECR;
+
+        case PIPE_STENCIL_OP_INCR_WRAP:
+                return MALI_STENCIL_INCR_WRAP;
+
+        case PIPE_STENCIL_OP_DECR_WRAP:
+                return MALI_STENCIL_DECR_WRAP;
+
+        case PIPE_STENCIL_OP_INVERT:
+                return MALI_STENCIL_INVERT;
+
+        default:
+                unreachable("Invalid stencil op");
+        }
+}
+
+static void
+panfrost_make_stencil_state(const struct pipe_stencil_state *in,
+                            struct mali_stencil_test *out)
+{
+        out->ref = 0; /* Gallium gets it from elsewhere */
+
+        out->mask = in->valuemask;
+        out->func = panfrost_translate_compare_func(in->func);
+        out->sfail = panfrost_translate_stencil_op(in->fail_op);
+        out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
+        out->dppass = panfrost_translate_stencil_op(in->zpass_op);
+}
+
+static void
+panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
+                                     struct mali_shader_meta *fragmeta)
+{
+        if (!ctx->rasterizer) {
+                SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
+                SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
+                fragmeta->depth_units = 0.0f;
+                fragmeta->depth_factor = 0.0f;
+                SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
+                SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
+                return;
+        }
+
+        bool msaa = ctx->rasterizer->base.multisample;
+
+        /* TODO: Sample size */
+        SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
+        SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
+        fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
+        fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
+
+        /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
+
+        SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
+                ctx->rasterizer->base.offset_tri);
+        SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
+                ctx->rasterizer->base.offset_tri);
+}
+
+static void
+panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
+                              struct mali_shader_meta *fragmeta)
+{
+        const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
+        int zfunc = PIPE_FUNC_ALWAYS;
+
+        if (!zsa) {
+                struct pipe_stencil_state default_stencil = {
+                        .enabled = 0,
+                        .func = PIPE_FUNC_ALWAYS,
+                        .fail_op = MALI_STENCIL_KEEP,
+                        .zfail_op = MALI_STENCIL_KEEP,
+                        .zpass_op = MALI_STENCIL_KEEP,
+                        .writemask = 0xFF,
+                        .valuemask = 0xFF
+                };
+
+                panfrost_make_stencil_state(&default_stencil,
+                                            &fragmeta->stencil_front);
+                fragmeta->stencil_mask_front = default_stencil.writemask;
+                fragmeta->stencil_back = fragmeta->stencil_front;
+                fragmeta->stencil_mask_back = default_stencil.writemask;
+                SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
+                SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
+        } else {
+                SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
+                        zsa->stencil[0].enabled);
+                panfrost_make_stencil_state(&zsa->stencil[0],
+                                            &fragmeta->stencil_front);
+                fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
+                fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
+
+                /* If back-stencil is not enabled, use the front values */
+
+                if (zsa->stencil[1].enabled) {
+                        panfrost_make_stencil_state(&zsa->stencil[1],
+                                                    &fragmeta->stencil_back);
+                        fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
+                        fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
+                } else {
+                        fragmeta->stencil_back = fragmeta->stencil_front;
+                        fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
+                        fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
+                }
+
+                if (zsa->depth.enabled)
+                        zfunc = zsa->depth.func;
+
+                /* Depth state (TODO: Refactor) */
+
+                SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
+                        zsa->depth.writemask);
+        }
+
+        fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
+        fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
+}
+
+static void
+panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
+                                struct mali_shader_meta *fragmeta,
+                                struct midgard_blend_rt *rts)
+{
+        const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+
+        SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
+                (screen->quirks & MIDGARD_SFBD) && ctx->blend &&
+                !ctx->blend->base.dither);
+
+        /* Get blending setup */
+        unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+
+        struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
+        unsigned shader_offset = 0;
+        struct panfrost_bo *shader_bo = NULL;
+
+        for (unsigned c = 0; c < rt_count; ++c)
+                blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
+                                                          &shader_offset);
+
+         /* If there is a blend shader, work registers are shared. XXX: opt */
+
+        for (unsigned c = 0; c < rt_count; ++c) {
+                if (blend[c].is_shader)
+                        fragmeta->midgard1.work_count = 16;
+        }
+
+        /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
+         * copied to the blend_meta appended (by convention), but this is the
+         * field actually read by the hardware. (Or maybe both are read...?).
+         * Specify the last RTi with a blend shader. */
+
+        fragmeta->blend.shader = 0;
+
+        for (signed rt = (rt_count - 1); rt >= 0; --rt) {
+                if (!blend[rt].is_shader)
+                        continue;
+
+                fragmeta->blend.shader = blend[rt].shader.gpu |
+                                         blend[rt].shader.first_tag;
+                break;
+        }
+
+        if (screen->quirks & MIDGARD_SFBD) {
+                /* When only a single render target platform is used, the blend
+                 * information is inside the shader meta itself. We additionally
+                 * need to signal CAN_DISCARD for nontrivial blend modes (so
+                 * we're able to read back the destination buffer) */
+
+                SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
+                        blend[0].is_shader);
+
+                if (!blend[0].is_shader) {
+                        fragmeta->blend.equation = *blend[0].equation.equation;
+                        fragmeta->blend.constant = blend[0].equation.constant;
+                }
+
+                SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
+                        !blend[0].no_blending);
+                return;
+        }
+
+        /* Additional blend descriptor tacked on for jobs using MFBD */
+
+        for (unsigned i = 0; i < rt_count; ++i) {
+                rts[i].flags = 0x200;
+
+                bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
+                               (ctx->pipe_framebuffer.cbufs[i]) &&
+                               util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
+
+                SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
+                SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
+                SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
+                SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+
+                if (blend[i].is_shader) {
+                        rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
+                } else {
+                        rts[i].blend.equation = *blend[i].equation.equation;
+                        rts[i].blend.constant = blend[i].equation.constant;
+                }
+        }
+}
+
+static void
+panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
+                               struct mali_shader_meta *fragmeta,
+                               struct midgard_blend_rt *rts)
+{
+        const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+        struct panfrost_shader_state *fs;
+
+        fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
+
+        fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
+        fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
+        fragmeta->unknown2_4 = 0x4e0;
+
+        /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
+         * is required (independent of 32-bit/64-bit descriptors), or why it's
+         * not used on later GPU revisions. Otherwise, all shader jobs fault on
+         * these earlier chips (perhaps this is a chicken bit of some kind).
+         * More investigation is needed. */
+
+        SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD);
+
+        /* Depending on whether it's legal to in the given shader, we try to
+         * enable early-z testing (or forward-pixel kill?) */
+
+        SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
+                !fs->can_discard && !fs->writes_depth);
+
+        /* Add the writes Z/S flags if needed. */
+        SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
+        SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
+
+        /* Any time texturing is used, derivatives are implicitly calculated,
+         * so we need to enable helper invocations */
+
+        SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
+                fs->helper_invocations);
+
+        /* CAN_DISCARD should be set if the fragment shader possibly contains a
+         * 'discard' instruction. It is likely this is related to optimizations
+         * related to forward-pixel kill, as per "Mali Performance 3: Is
+         * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
+
+        SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
+        SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
+
+        panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
+        panfrost_frag_meta_zsa_update(ctx, fragmeta);
+        panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
+}
+
 void
 panfrost_emit_shader_meta(struct panfrost_batch *batch,
                           enum pipe_shader_type st,
@@ -112,14 +439,44 @@ panfrost_emit_shader_meta(struct panfrost_batch *batch,
                 return;
         }
 
+        struct mali_shader_meta meta;
+
+        panfrost_shader_meta_init(ctx, st, &meta);
+
         /* Add the shader BO to the batch. */
         panfrost_batch_add_bo(batch, ss->bo,
                               PAN_BO_ACCESS_PRIVATE |
                               PAN_BO_ACCESS_READ |
                               panfrost_bo_access_for_stage(st));
 
-        vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
-                                                        sizeof(*ss->tripipe));
+        mali_ptr shader_ptr;
+
+        if (st == PIPE_SHADER_FRAGMENT) {
+                struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+                unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+                size_t desc_size = sizeof(meta);
+                struct midgard_blend_rt rts[4];
+                struct panfrost_transfer xfer;
+
+                assert(rt_count <= ARRAY_SIZE(rts));
+
+                panfrost_frag_shader_meta_init(ctx, &meta, rts);
+
+                if (!(screen->quirks & MIDGARD_SFBD))
+                        desc_size += sizeof(*rts) * rt_count;
+
+                xfer = panfrost_allocate_transient(batch, desc_size);
+
+                memcpy(xfer.cpu, &meta, sizeof(meta));
+                memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
+
+                shader_ptr = xfer.gpu;
+        } else {
+                shader_ptr = panfrost_upload_transient(batch, &meta,
+                                                       sizeof(meta));
+        }
+
+        vtp->postfix.shader = shader_ptr;
 }
 
 static void
index 2fa088b369c8e304681bd3f5a6472553eb9fb2b3..f16e1bba9ae8c017556318960b5425881af08238 100644 (file)
@@ -32,6 +32,9 @@
 
 #include "pan_job.h"
 
+unsigned
+panfrost_translate_compare_func(enum pipe_compare_func in);
+
 void
 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
                                struct midgard_payload_vertex_tiler *vt);
index d05016e9359fa2e7444100620f546c7090f21612..d67e87f590b5e6f1b5142fafb42f4dafe75b518e 100644 (file)
@@ -54,9 +54,6 @@ panfrost_create_compute_state(
         so->variant_count = 1;
         so->active_variant = 0;
 
-        /* calloc, instead of malloc - to zero unused fields */
-        v->tripipe = CALLOC_STRUCT(mali_shader_meta);
-
         if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
                 struct blob_reader reader;
                 const struct pipe_binary_program_header *hdr = cso->prog;
@@ -66,9 +63,8 @@ panfrost_create_compute_state(
                 so->cbase.ir_type = PIPE_SHADER_IR_NIR;
         }
 
-        panfrost_shader_compile(ctx, v->tripipe,
-                        so->cbase.ir_type, so->cbase.prog,
-                        MESA_SHADER_COMPUTE, v, NULL);
+        panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
+                                MESA_SHADER_COMPUTE, v, NULL);
 
         return so;
 }
@@ -121,7 +117,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
         if (info->input)
                 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
 
-        panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
         panfrost_emit_shader_meta(batch, PIPE_SHADER_COMPUTE, payload);
         panfrost_emit_const_buf(batch, PIPE_SHADER_COMPUTE, payload);
         panfrost_emit_shared_memory(batch, info, payload);
index 5904645aa250158a474109068367a6c22c9d3df7..0f4bca74066d1fd3e86a90c10f26afab7e723c96 100644 (file)
@@ -207,126 +207,6 @@ translate_tex_wrap(enum pipe_tex_wrap w)
         }
 }
 
-static unsigned
-panfrost_translate_compare_func(enum pipe_compare_func in)
-{
-        switch (in) {
-        case PIPE_FUNC_NEVER:
-                return MALI_FUNC_NEVER;
-
-        case PIPE_FUNC_LESS:
-                return MALI_FUNC_LESS;
-
-        case PIPE_FUNC_EQUAL:
-                return MALI_FUNC_EQUAL;
-
-        case PIPE_FUNC_LEQUAL:
-                return MALI_FUNC_LEQUAL;
-
-        case PIPE_FUNC_GREATER:
-                return MALI_FUNC_GREATER;
-
-        case PIPE_FUNC_NOTEQUAL:
-                return MALI_FUNC_NOTEQUAL;
-
-        case PIPE_FUNC_GEQUAL:
-                return MALI_FUNC_GEQUAL;
-
-        case PIPE_FUNC_ALWAYS:
-                return MALI_FUNC_ALWAYS;
-
-        default:
-                unreachable("Invalid func");
-        }
-}
-
-static unsigned
-panfrost_translate_stencil_op(enum pipe_stencil_op in)
-{
-        switch (in) {
-        case PIPE_STENCIL_OP_KEEP:
-                return MALI_STENCIL_KEEP;
-
-        case PIPE_STENCIL_OP_ZERO:
-                return MALI_STENCIL_ZERO;
-
-        case PIPE_STENCIL_OP_REPLACE:
-                return MALI_STENCIL_REPLACE;
-
-        case PIPE_STENCIL_OP_INCR:
-                return MALI_STENCIL_INCR;
-
-        case PIPE_STENCIL_OP_DECR:
-                return MALI_STENCIL_DECR;
-
-        case PIPE_STENCIL_OP_INCR_WRAP:
-                return MALI_STENCIL_INCR_WRAP;
-
-        case PIPE_STENCIL_OP_DECR_WRAP:
-                return MALI_STENCIL_DECR_WRAP;
-
-        case PIPE_STENCIL_OP_INVERT:
-                return MALI_STENCIL_INVERT;
-
-        default:
-                unreachable("Invalid stencil op");
-        }
-}
-
-static void
-panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
-{
-        out->ref = 0; /* Gallium gets it from elsewhere */
-
-        out->mask = in->valuemask;
-        out->func = panfrost_translate_compare_func(in->func);
-        out->sfail = panfrost_translate_stencil_op(in->fail_op);
-        out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
-        out->dppass = panfrost_translate_stencil_op(in->zpass_op);
-}
-
-static void
-panfrost_default_shader_backend(struct panfrost_context *ctx)
-{
-        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-        struct mali_shader_meta shader = {
-                .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
-
-                .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
-                .unknown2_4 = MALI_NO_MSAA | 0x4e0,
-        };
-
-        /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this is
-         * required (independent of 32-bit/64-bit descriptors), or why it's not
-         * used on later GPU revisions. Otherwise, all shader jobs fault on
-         * these earlier chips (perhaps this is a chicken bit of some kind).
-         * More investigation is needed. */
-
-       if (screen->quirks & MIDGARD_SFBD)
-               shader.unknown2_4 |= 0x10;
-
-        struct pipe_stencil_state default_stencil = {
-                .enabled = 0,
-                .func = PIPE_FUNC_ALWAYS,
-                .fail_op = MALI_STENCIL_KEEP,
-                .zfail_op = MALI_STENCIL_KEEP,
-                .zpass_op = MALI_STENCIL_KEEP,
-                .writemask = 0xFF,
-                .valuemask = 0xFF
-        };
-
-        panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
-        shader.stencil_mask_front = default_stencil.writemask;
-
-        panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
-        shader.stencil_mask_back = default_stencil.writemask;
-
-        if (default_stencil.enabled)
-                shader.unknown2_4 |= MALI_STENCIL_TEST;
-
-        memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
-}
-
 bool
 panfrost_writes_point_size(struct panfrost_context *ctx)
 {
@@ -503,33 +383,12 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
         return 32 - __builtin_clz(mask);
 }
 
-/* Fixes up a shader state with current state */
-
-void
-panfrost_patch_shader_state(struct panfrost_context *ctx,
-                            enum pipe_shader_type stage)
-{
-        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, stage);
-
-        if (!ss)
-                return;
-
-        ss->tripipe->texture_count = ctx->sampler_view_count[stage];
-        ss->tripipe->sampler_count = ctx->sampler_count[stage];
-
-        ss->tripipe->midgard1.flags_lo = 0x220;
-
-        unsigned ubo_count = panfrost_ubo_count(ctx, stage);
-        ss->tripipe->midgard1.uniform_buffer_count = ubo_count;
-}
-
 /* Go through dirty flags and actualise them in the cmdstream. */
 
 static void
 panfrost_emit_for_draw(struct panfrost_context *ctx)
 {
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 
         panfrost_batch_add_fbo_bos(batch);
 
@@ -542,166 +401,15 @@ panfrost_emit_for_draw(struct panfrost_context *ctx)
         unsigned total_count = ctx->padded_count * ctx->instance_count;
         panfrost_emit_varying_descriptor(ctx, total_count);
 
-        if (ctx->rasterizer) {
-                bool msaa = ctx->rasterizer->base.multisample;
-
-                /* TODO: Sample size */
-                SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
-                SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
-        }
-
         panfrost_batch_set_requirements(batch);
 
         panfrost_vt_update_rasterizer(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
         panfrost_vt_update_occlusion_query(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
 
-        panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
         panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX,
                                   &ctx->payloads[PIPE_SHADER_VERTEX]);
-
-        if (ctx->shader[PIPE_SHADER_FRAGMENT]) {
-                struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-
-                panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-
-#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
-
-                COPY(shader);
-                COPY(attribute_count);
-                COPY(varying_count);
-                COPY(texture_count);
-                COPY(sampler_count);
-                COPY(midgard1.uniform_count);
-                COPY(midgard1.uniform_buffer_count);
-                COPY(midgard1.work_count);
-                COPY(midgard1.flags_lo);
-                COPY(midgard1.flags_hi);
-
-#undef COPY
-
-                /* Get blending setup */
-                unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
-
-                struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
-                unsigned shader_offset = 0;
-                struct panfrost_bo *shader_bo = NULL;
-
-                for (unsigned c = 0; c < rt_count; ++c) {
-                        blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset);
-                }
-
-                /* If there is a blend shader, work registers are shared. XXX: opt */
-
-                for (unsigned c = 0; c < rt_count; ++c) {
-                        if (blend[c].is_shader)
-                                ctx->fragment_shader_core.midgard1.work_count = 16;
-                }
-
-                /* Depending on whether it's legal to in the given shader, we
-                 * try to enable early-z testing (or forward-pixel kill?) */
-
-                SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z,
-                        !variant->can_discard && !variant->writes_depth);
-
-                /* Add the writes Z/S flags if needed. */
-                SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo,
-                        MALI_WRITES_Z, variant->writes_depth);
-                SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi,
-                        MALI_WRITES_S, variant->writes_stencil);
-
-                /* Any time texturing is used, derivatives are implicitly
-                 * calculated, so we need to enable helper invocations */
-
-                SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations);
-
-                /* Assign the stencil refs late */
-
-                unsigned front_ref = ctx->stencil_ref.ref_value[0];
-                unsigned back_ref = ctx->stencil_ref.ref_value[1];
-                bool back_enab = ctx->depth_stencil->stencil[1].enabled;
-
-                ctx->fragment_shader_core.stencil_front.ref = front_ref;
-                ctx->fragment_shader_core.stencil_back.ref = back_enab ? back_ref : front_ref;
-
-                /* CAN_DISCARD should be set if the fragment shader possibly
-                 * contains a 'discard' instruction. It is likely this is
-                 * related to optimizations related to forward-pixel kill, as
-                 * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
-                 * thing?" by Peter Harris
-                 */
-
-                SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard);
-                SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard);
-
-                /* Even on MFBD, the shader descriptor gets blend shaders. It's
-                 * *also* copied to the blend_meta appended (by convention),
-                 * but this is the field actually read by the hardware. (Or
-                 * maybe both are read...?). Specify the last RTi with a blend
-                 * shader. */
-
-                ctx->fragment_shader_core.blend.shader = 0;
-
-                for (signed rt = (rt_count - 1); rt >= 0; --rt) {
-                        if (blend[rt].is_shader) {
-                                ctx->fragment_shader_core.blend.shader =
-                                        blend[rt].shader.gpu | blend[rt].shader.first_tag;
-                                break;
-                        }
-                }
-
-                if (screen->quirks & MIDGARD_SFBD) {
-                        /* When only a single render target platform is used, the blend
-                         * information is inside the shader meta itself. We
-                         * additionally need to signal CAN_DISCARD for nontrivial blend
-                         * modes (so we're able to read back the destination buffer) */
-
-                        SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader);
-
-                        if (!blend[0].is_shader) {
-                                ctx->fragment_shader_core.blend.equation =
-                                        *blend[0].equation.equation;
-                                ctx->fragment_shader_core.blend.constant =
-                                        blend[0].equation.constant;
-                        }
-
-                        SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending);
-                }
-
-                size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
-                struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
-                memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
-
-                ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu;
-
-                if (!(screen->quirks & MIDGARD_SFBD)) {
-                        /* Additional blend descriptor tacked on for jobs using MFBD */
-
-                        struct midgard_blend_rt rts[4];
-
-                        for (unsigned i = 0; i < rt_count; ++i) {
-                                rts[i].flags = 0x200;
-
-                                bool is_srgb =
-                                        (ctx->pipe_framebuffer.nr_cbufs > i) &&
-                                        (ctx->pipe_framebuffer.cbufs[i]) &&
-                                        util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
-
-                                SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
-                                SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
-                                SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
-                                SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
-
-                                if (blend[i].is_shader) {
-                                        rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
-                                } else {
-                                        rts[i].blend.equation = *blend[i].equation.equation;
-                                        rts[i].blend.constant = blend[i].equation.constant;
-                                }
-                        }
-
-                        memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count);
-                }
-        }
+        panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
+                                  &ctx->payloads[PIPE_SHADER_FRAGMENT]);
 
         /* We stage to transient, so always dirty.. */
         if (ctx->vertex)
@@ -1110,17 +818,9 @@ panfrost_bind_rasterizer_state(
         if (!hwcso)
                 return;
 
-        ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
-        ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
-
         /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
         assert(ctx->rasterizer->base.offset_clamp == 0.0);
 
-        /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
-
-        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_A, ctx->rasterizer->base.offset_tri);
-        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_B, ctx->rasterizer->base.offset_tri);
-
         /* Point sprites are emulated */
 
         struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
@@ -1184,15 +884,13 @@ panfrost_create_shader_state(
         if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) {
                 struct panfrost_context *ctx = pan_context(pctx);
 
-                struct mali_shader_meta meta;
                 struct panfrost_shader_state state;
                 uint64_t outputs_written;
 
-                panfrost_shader_compile(ctx, &meta,
-                              PIPE_SHADER_IR_NIR,
-                                      so->base.ir.nir,
-                                        tgsi_processor_to_shader_stage(stage), &state,
-                                        &outputs_written);
+                panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
+                                        so->base.ir.nir,
+                                        tgsi_processor_to_shader_stage(stage),
+                                        &state, &outputs_written);
         }
 
         return so;
@@ -1440,9 +1138,6 @@ panfrost_bind_shader_state(
                                         PIPE_SPRITE_COORD_UPPER_LEFT;
                         }
                 }
-
-                variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta));
-
         }
 
         /* Select this variant */
@@ -1456,12 +1151,12 @@ panfrost_bind_shader_state(
         if (!shader_state->compiled) {
                 uint64_t outputs_written = 0;
 
-                panfrost_shader_compile(ctx, shader_state->tripipe,
-                              variants->base.type,
-                              variants->base.type == PIPE_SHADER_IR_NIR ?
-                                      variants->base.ir.nir :
-                                      variants->base.tokens,
-                                        tgsi_processor_to_shader_stage(type), shader_state,
+                panfrost_shader_compile(ctx, variants->base.type,
+                                        variants->base.type == PIPE_SHADER_IR_NIR ?
+                                        variants->base.ir.nir :
+                                        variants->base.tokens,
+                                        tgsi_processor_to_shader_stage(type),
+                                        shader_state,
                                         &outputs_written);
 
                 shader_state->compiled = true;
@@ -1753,28 +1448,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
                 ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
         }
 
-        /* Stencil state */
-        SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled);
-
-        panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
-        ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
-
-        /* If back-stencil is not enabled, use the front values */
-        bool back_enab = ctx->depth_stencil->stencil[1].enabled;
-        unsigned back_index = back_enab ? 1 : 0;
-
-        panfrost_make_stencil_state(&depth_stencil->stencil[back_index], &ctx->fragment_shader_core.stencil_back);
-        ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask;
-
-        /* Depth state (TODO: Refactor) */
-        SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_WRITEMASK,
-                depth_stencil->depth.writemask);
-
-        int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
-
-        ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
-        ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
-
         /* Bounds test not implemented */
         assert(!depth_stencil->depth.bounds_test);
 }
@@ -2138,7 +1811,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
         panfrost_batch_init(ctx);
         panfrost_emit_vertex_payload(ctx);
         panfrost_invalidate_frame(ctx);
-        panfrost_default_shader_backend(ctx);
 
         return gallium;
 }
index 2e0c4459c65c2b769ddd0252efb1ba1542991d57..b73883df3227da728e08ad7135659d2ffb01e8f1 100644 (file)
@@ -119,12 +119,6 @@ struct panfrost_context {
         /* Each draw has corresponding vertex and tiler payloads */
         struct midgard_payload_vertex_tiler payloads[PIPE_SHADER_TYPES];
 
-        /* The fragment shader binary itself is pointed here (for the tripipe) but
-         * also everything else in the shader core, including blending, the
-         * stencil/depth tests, etc. Refer to the presentations. */
-
-        struct mali_shader_meta fragment_shader_core;
-
         unsigned vertex_count;
         unsigned instance_count;
         enum pipe_prim_type active_prim;
@@ -188,7 +182,6 @@ struct panfrost_rasterizer {
 struct panfrost_shader_state {
         /* Compiled, mapped descriptor, ready for the hardware */
         bool compiled;
-        struct mali_shader_meta *tripipe;
 
         /* Non-descript information */
         int uniform_count;
@@ -296,10 +289,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx);
 bool
 panfrost_writes_point_size(struct panfrost_context *ctx);
 
-void
-panfrost_patch_shader_state(struct panfrost_context *ctx,
-                            enum pipe_shader_type stage);
-
 struct panfrost_transfer
 panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
 
@@ -325,14 +314,12 @@ mali_ptr
 panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws);
 
 void
-panfrost_shader_compile(
-                struct panfrost_context *ctx,
-                struct mali_shader_meta *meta,
-                enum pipe_shader_ir ir_type,
-                const void *ir,
-                gl_shader_stage stage,
-                struct panfrost_shader_state *state,
-                uint64_t *outputs_written);
+panfrost_shader_compile(struct panfrost_context *ctx,
+                        enum pipe_shader_ir ir_type,
+                        const void *ir,
+                        gl_shader_stage stage,
+                        struct panfrost_shader_state *state,
+                        uint64_t *outputs_written);
 
 unsigned
 panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage);