panfrost: Let precompile imply shaderdb
[mesa.git] / src / gallium / drivers / panfrost / pan_assemble.c
index d1ecfd4ac03fad1e43a6aad46e5284eee6c06635..87127ba945b29cc0c319e1684cff044aa380cd1f 100644 (file)
@@ -25,7 +25,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "pan_bo.h"
 #include "pan_context.h"
+#include "pan_util.h"
 
 #include "compiler/nir/nir.h"
 #include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_dump.h"
 
 void
-panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state)
+panfrost_shader_compile(
+                struct panfrost_context *ctx,
+                struct mali_shader_meta *meta,
+                enum pipe_shader_ir ir_type,
+                const void *ir,
+                gl_shader_stage stage,
+                struct panfrost_shader_state *state,
+                uint64_t *outputs_written)
 {
+        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
         uint8_t *dst;
 
         nir_shader *s;
 
-        struct pipe_shader_state *cso = state->base;
-
-        if (cso->type == PIPE_SHADER_IR_NIR) {
-                s = nir_shader_clone(NULL, cso->ir.nir);
+        if (ir_type == PIPE_SHADER_IR_NIR) {
+                s = nir_shader_clone(NULL, ir);
         } else {
-                assert (cso->type == PIPE_SHADER_IR_TGSI);
-                //tgsi_dump(cso->tokens, 0);
-                s = tgsi_to_nir(cso->tokens, ctx->base.screen);
+                assert (ir_type == PIPE_SHADER_IR_TGSI);
+                s = tgsi_to_nir(ir, ctx->base.screen);
         }
 
-        s->info.stage = type == JOB_TYPE_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_FRAGMENT;
-
-        if (s->info.stage == MESA_SHADER_FRAGMENT) {
-                /* Inject the alpha test now if we need to */
-
-                if (state->alpha_state.enabled) {
-                        NIR_PASS_V(s, nir_lower_alpha_test, state->alpha_state.func, false);
-                }
-        }
+        s->info.stage = stage;
 
         /* Call out to Midgard compiler given the above NIR */
 
@@ -67,7 +66,8 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
                 .alpha_ref = state->alpha_state.ref_value
         };
 
-        midgard_compile_shader_nir(s, &program, false);
+        midgard_compile_shader_nir(s, &program, false, 0, screen->gpu_id,
+                        pan_debug & PAN_DBG_PRECOMPILE);
 
         /* Prepare the compiled binary for upload */
         int size = program.compiled.size;
@@ -77,136 +77,92 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
          * I bet someone just thought that would be a cute pun. At least,
          * that's how I'd do it. */
 
-        meta->shader = panfrost_upload(&ctx->shaders, dst, size, true) | program.first_tag;
+        state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
+        memcpy(state->bo->cpu, dst, size);
+        meta->shader = state->bo->gpu | program.first_tag;
 
         util_dynarray_fini(&program.compiled);
 
+        /* Sysvals are prepended */
+        program.uniform_count += program.sysval_count;
+        state->sysval_count = program.sysval_count;
+        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
+
         meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
-        meta->attribute_count = program.attribute_count;
-        meta->varying_count = program.varying_count;
         meta->midgard1.work_count = program.work_register_count;
 
-        state->can_discard = program.can_discard;
+        switch (stage) {
+        case MESA_SHADER_VERTEX:
+                meta->attribute_count = util_bitcount64(s->info.inputs_read);
+                meta->varying_count = util_bitcount64(s->info.outputs_written);
+                break;
+        case MESA_SHADER_FRAGMENT:
+                meta->attribute_count = 0;
+                meta->varying_count = util_bitcount64(s->info.inputs_read);
+                break;
+        case MESA_SHADER_COMPUTE:
+                /* TODO: images */
+                meta->attribute_count = 0;
+                meta->varying_count = 0;
+                break;
+        default:
+                unreachable("Unknown shader state");
+        }
+
+        state->can_discard = s->info.fs.uses_discard;
         state->writes_point_size = program.writes_point_size;
+        state->reads_point_coord = false;
+        state->helper_invocations = s->info.fs.needs_helper_invocations;
+        state->stack_size = program.tls_size;
+
+        if (outputs_written)
+                *outputs_written = s->info.outputs_written;
 
         /* Separate as primary uniform count is truncated */
         state->uniform_count = program.uniform_count;
 
-        /* gl_Position eats up an extra spot */
-        if (type == JOB_TYPE_VERTEX)
-                meta->varying_count += 1;
-
-       /* Note: gl_FragCoord does -not- eat an extra spot; it will be included
-        * in our count if we need it */
-
         meta->midgard1.unknown2 = 8; /* XXX */
 
-        /* Varyings are known only through the shader. We choose to upload this
-         * information with the vertex shader, though the choice is perhaps
-         * arbitrary */
-
-        if (type == JOB_TYPE_VERTEX) {
-                struct panfrost_varyings *varyings = &state->varyings;
-
-                /* Measured in vec4 words. Don't include gl_Position */
-                int varying_count = program.varying_count;
+        unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
+        unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
+        unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
 
-                /* Setup two buffers, one for position, the other for normal
-                 * varyings, as seen in traces. TODO: Are there other
-                 * configurations we might use? */
+        /* Iterate the varyings and emit the corresponding descriptor */
+        for (unsigned i = 0; i < meta->varying_count; ++i) {
+                unsigned location = program.varyings[i];
 
-                varyings->varying_buffer_count = 2;
-
-                /* mediump vec4s sequentially */
-                varyings->varyings_stride[0] = (2 * sizeof(float)) * varying_count;
-
-                /* highp gl_Position */
-                varyings->varyings_stride[1] = 4 * sizeof(float);
-
-                /* mediump gl_PointSize */
-                if (program.writes_point_size) {
-                        ++varyings->varying_buffer_count;
-                        varyings->varyings_stride[2] = 2; /* sizeof(fp16) */
-                }
-
-                /* Setup gl_Position, its weirdo analogue, and gl_PointSize (optionally) */
-                unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
-                unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
-
-                struct mali_attr_meta vertex_special_varyings[] = {
-                        {
-                                .index = 1,
-                                .format = MALI_VARYING_POS,
-
-                                .swizzle = default_vec4_swizzle,
-                                .unknown1 = 0x2,
-                        },
-                        {
-                                .index = 1,
-                                .format = MALI_RGBA16F,
-
-                                /* TODO: Wat? yyyy swizzle? */
-                                .swizzle = 0x249,
-                                .unknown1 = 0x0,
-                        },
-                        {
-                                .index = 2,
-                                .format = MALI_R16F,
-                                .swizzle =  default_vec1_swizzle,
-                                .unknown1 = 0x2
-                        }
+                /* Default to a vec4 varying */
+                struct mali_attr_meta v = {
+                        .format = MALI_RGBA32F,
+                        .swizzle = default_vec4_swizzle,
+                        .unknown1 = 0x2,
                 };
 
-                /* How many special vertex varyings are actually required? */
-                int vertex_special_count = 2 + (program.writes_point_size ? 1 : 0);
-
-                /* Setup actual varyings. XXX: Don't assume vec4 */
+                /* Check for special cases, otherwise assume general varying */
 
-                struct mali_attr_meta mali_varyings[PIPE_MAX_ATTRIBS];
+                if (location == VARYING_SLOT_POS) {
+                        if (stage == MESA_SHADER_FRAGMENT)
+                                state->reads_frag_coord = true;
+                        else
+                                v.format = MALI_VARYING_POS;
+                } else if (location == VARYING_SLOT_PSIZ) {
+                        v.format = MALI_R16F;
+                        v.swizzle = default_vec1_swizzle;
 
-                for (int i = 0; i < varying_count; ++i) {
-                        struct mali_attr_meta vec4_varying_meta = {
-                                .index = 0,
-                                .format = MALI_RGBA16F,
-                                .swizzle = default_vec4_swizzle,
-                                .unknown1 = 0x2,
+                        state->writes_point_size = true;
+                } else if (location == VARYING_SLOT_PNTC) {
+                        v.format = MALI_RG16F;
+                        v.swizzle = default_vec2_swizzle;
 
-                                /* Set offset to keep everything back-to-back in
-                                 * the same buffer */
-                                .src_offset = 8 * i,
-                        };
+                        state->reads_point_coord = true;
+                } else if (location == VARYING_SLOT_FACE) {
+                        v.format = MALI_R32I;
+                        v.swizzle = default_vec1_swizzle;
 
-                        mali_varyings[i] = vec4_varying_meta;
+                        state->reads_face = true;
                 }
 
-                /* We don't count the weirdo gl_Position in our varying count */
-                varyings->varying_count = varying_count - 1;
-
-                /* In this context, position_meta represents the implicit
-                 * gl_FragCoord varying. So, upload all the varyings */
-
-                unsigned varyings_size = sizeof(struct mali_attr_meta) * varyings->varying_count;
-                unsigned vertex_special_size = sizeof(struct mali_attr_meta) * vertex_special_count;
-                unsigned vertex_size = vertex_special_size + varyings_size;
-                unsigned fragment_size = varyings_size + sizeof(struct mali_attr_meta);
-
-                struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, vertex_size + fragment_size, HEAP_DESCRIPTOR);
-
-                /* Copy varyings in the follow order:
-                 *  - Position 1, 2
-                 *  - Varyings 1, 2, ..., n
-                 *  - Varyings 1, 2, ..., n (duplicate)
-                 *  - Position 1
-                 */
-
-                memcpy(transfer.cpu, vertex_special_varyings, vertex_special_size);
-                memcpy(transfer.cpu + vertex_special_size, mali_varyings, varyings_size);
-                memcpy(transfer.cpu + vertex_size, mali_varyings, varyings_size);
-                memcpy(transfer.cpu + vertex_size + varyings_size, &vertex_special_varyings[0], sizeof(struct mali_attr_meta));
-
-                /* Point to the descriptor */
-                varyings->varyings_buffer_cpu = transfer.cpu;
-                varyings->varyings_descriptor = transfer.gpu;
-                varyings->varyings_descriptor_fragment = transfer.gpu + vertex_size;
+                state->varyings[i] = v;
+                state->varyings_loc[i] = location;
         }
 }