panfrost: Emit blend descriptors on Bifrost
authorTomeu Vizoso <tomeu.vizoso@collabora.com>
Fri, 24 Apr 2020 06:40:51 +0000 (08:40 +0200)
committerTomeu Vizoso <tomeu.vizoso@collabora.com>
Thu, 30 Apr 2020 14:27:34 +0000 (16:27 +0200)
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4724>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_context.h
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/encoder/pan_format.c
src/panfrost/encoder/pan_texture.h
src/panfrost/include/panfrost-job.h
src/panfrost/pandecode/decode.c
src/panfrost/util/pan_ir.h

index 4f8e14b105e0c7a0bc3928d7638af3e259b86f11..4c047c76a8467cd9cbd4b5e9ad56502c5efe817b 100644 (file)
@@ -90,6 +90,31 @@ pan_format_from_glsl(const struct glsl_type *type)
                 MALI_NR_CHANNELS(4);
 }
 
+static enum bifrost_shader_type
+bifrost_blend_type_from_nir(nir_alu_type nir_type)
+{
+        switch(nir_type) {
+        case 0: /* Render target not in use */
+                return 0;
+        case nir_type_float16:
+                return BIFROST_BLEND_F16;
+        case nir_type_float32:
+                return BIFROST_BLEND_F32;
+        case nir_type_int32:
+                return BIFROST_BLEND_I32;
+        case nir_type_uint32:
+                return BIFROST_BLEND_U32;
+        case nir_type_int16:
+                return BIFROST_BLEND_I16;
+        case nir_type_uint16:
+                return BIFROST_BLEND_U16;
+        default:
+                DBG("Unsupported blend shader type for NIR alu type %d", nir_type);
+                assert(0);
+                return 0;
+        }
+}
+
 void
 panfrost_shader_compile(struct panfrost_context *ctx,
                         enum pipe_shader_ir ir_type,
@@ -197,6 +222,10 @@ panfrost_shader_compile(struct panfrost_context *ctx,
         state->uniform_cutoff = program.uniform_cutoff;
         state->work_reg_count = program.work_register_count;
 
+        if (dev->quirks & IS_BIFROST)
+                for (unsigned i = 0; i < BIFROST_MAX_RENDER_TARGET_COUNT; i++)
+                        state->blend_types[i] = bifrost_blend_type_from_nir(program.blend_types[i]);
+
         unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
         unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
         unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
index dfc5174bef5894fbde961f1051f4b27dc54983f2..439a09ca54ceffccab2315e27d3abe74c6610b58 100644 (file)
@@ -615,7 +615,7 @@ panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 static void
 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
                                 struct mali_shader_meta *fragmeta,
-                                struct midgard_blend_rt *rts)
+                                void *rts)
 {
         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 
@@ -679,22 +679,56 @@ panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
         /* Additional blend descriptor tacked on for jobs using MFBD */
 
         for (unsigned i = 0; i < rt_count; ++i) {
-                rts[i].flags = 0x200;
+                if (dev->quirks & IS_BIFROST) {
+                        struct bifrost_blend_rt *brts = rts;
+                        struct panfrost_shader_state *fs;
+                        fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
+
+                        brts[i].flags = 0x200;
+                        if (blend[i].is_shader) {
+                                /* The blend shader's address needs to be at
+                                 * the same top 32 bit as the fragment shader.
+                                 * TODO: Ensure that's always the case.
+                                 */
+                                assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
+                                       (fs->bo->gpu & (0xffffffffull << 32)));
+                                brts[i].shader = blend[i].shader.gpu;
+                                brts[i].unk2 = 0x0;
+                        } else {
+                                enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
+                                const struct util_format_description *format_desc;
+                                format_desc = util_format_description(format);
+
+                                brts[i].equation = *blend[i].equation.equation;
+
+                                /* TODO: this is a bit more complicated */
+                                brts[i].constant = blend[i].equation.constant;
+
+                                brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
+                                brts[i].unk2 = 0x19;
+
+                                brts[i].shader_type = fs->blend_types[i];
+                        }
+                } else {
+                        struct midgard_blend_rt *mrts = rts;
 
-                bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
-                               (ctx->pipe_framebuffer.cbufs[i]) &&
-                               util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
+                        mrts[i].flags = 0x200;
 
-                SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
-                SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
-                SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
-                SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+                        bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
+                                       (ctx->pipe_framebuffer.cbufs[i]) &&
+                                       util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 
-                if (blend[i].is_shader) {
-                        rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
-                } else {
-                        rts[i].blend.equation = *blend[i].equation.equation;
-                        rts[i].blend.constant = blend[i].equation.constant;
+                        SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
+                        SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
+                        SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb);
+                        SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+
+                        if (blend[i].is_shader) {
+                                mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
+                        } else {
+                                mrts[i].blend.equation = *blend[i].equation.equation;
+                                mrts[i].blend.constant = blend[i].equation.constant;
+                        }
                 }
         }
 }
@@ -702,7 +736,7 @@ panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 static void
 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
                                struct mali_shader_meta *fragmeta,
-                               struct midgard_blend_rt *rts)
+                               void *rts)
 {
         const struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct panfrost_shader_state *fs;
@@ -779,20 +813,31 @@ panfrost_emit_shader_meta(struct panfrost_batch *batch,
                 struct panfrost_device *dev = pan_device(ctx->base.screen);
                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
                 size_t desc_size = sizeof(meta);
-                struct midgard_blend_rt rts[4];
+                void *rts = NULL;
                 struct panfrost_transfer xfer;
+                unsigned rt_size;
 
-                assert(rt_count <= ARRAY_SIZE(rts));
+                if (dev->quirks & MIDGARD_SFBD)
+                        rt_size = 0;
+                else if (dev->quirks & IS_BIFROST)
+                        rt_size = sizeof(struct bifrost_blend_rt);
+                else
+                        rt_size = sizeof(struct midgard_blend_rt);
 
-                panfrost_frag_shader_meta_init(ctx, &meta, rts);
+                desc_size += rt_size * rt_count;
 
-                if (!(dev->quirks & MIDGARD_SFBD))
-                        desc_size += sizeof(*rts) * rt_count;
+                if (rt_size)
+                        rts = rzalloc_size(ctx, rt_size * rt_count);
+
+                panfrost_frag_shader_meta_init(ctx, &meta, rts);
 
                 xfer = panfrost_allocate_transient(batch, desc_size);
 
                 memcpy(xfer.cpu, &meta, sizeof(meta));
-                memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
+                memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
+
+                if (rt_size)
+                        ralloc_free(rts);
 
                 shader_ptr = xfer.gpu;
         } else {
index d43c202a8551020780e166d36f4779323b57d584..c0a6d5fa1d6b8f140fb3fcdcf5d8d6067e2c568a 100644 (file)
@@ -195,6 +195,8 @@ struct panfrost_shader_state {
         unsigned stack_size;
         unsigned shared_size;
 
+        /* For Bifrost - output type for each RT */
+        enum bifrost_shader_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT];
 
         unsigned int varying_count;
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
index 1586bdcbe2cdb7f22030ae92a368516cbce6c25a..ca4aa427b49c010ed6697a7002a20ffbd8db3195 100644 (file)
@@ -155,8 +155,9 @@ bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr)
                 .vector_channels = 4
         };
 
-        assert(blend.blend_location < 8);
+        assert(blend.blend_location < BIFROST_MAX_RENDER_TARGET_COUNT);
         assert(ctx->blend_types);
+        assert(blend.src_types[0]);
         ctx->blend_types[blend.blend_location] = blend.src_types[0];
 
         bi_emit(ctx, blend);
index 873c4d227d2f43df2fc695a4985b3601fe92a7a9..408f815b6f8c0cee24db6ef80e0db3ed0d3dec76 100644 (file)
@@ -275,3 +275,20 @@ panfrost_invert_swizzle(const unsigned char *in, unsigned char *out)
                 out[idx] = PIPE_SWIZZLE_X + c;
         }
 }
+
+enum mali_format
+panfrost_format_to_bifrost_blend(const struct util_format_description *desc)
+{
+        enum mali_format format = panfrost_find_format(desc);
+
+        switch (format) {
+        case MALI_RGBA4_UNORM:
+                return MALI_RGBA4;
+        case MALI_RGBA8_UNORM:
+                return MALI_RGBA8_2;
+        case MALI_RGB10_A2_UNORM:
+                return MALI_RGB10_A2_2;
+        default:
+                return format;
+        }
+}
index d049ada9fdce708ecf358a3671fbd0f4e89e7e88..5bcda798ac3f3cf1899a6826262691cdab091a6d 100644 (file)
@@ -146,4 +146,7 @@ panfrost_get_default_swizzle(unsigned components)
         }
 }
 
+enum mali_format
+panfrost_format_to_bifrost_blend(const struct util_format_description *desc);
+
 #endif
index ae70cb6b6562a63bc5915f5347af10bd88034d3c..924c4e455188cf62362d9321185e0a7d356e7812 100644 (file)
@@ -466,6 +466,8 @@ enum bifrost_shader_type {
         BIFROST_BLEND_U16 = 5,
 };
 
+#define BIFROST_MAX_RENDER_TARGET_COUNT 8
+
 struct bifrost_blend_rt {
         /* This is likely an analogue of the flags on
          * midgard_blend_rt */
@@ -482,51 +484,53 @@ struct bifrost_blend_rt {
          * constant_hi = int(f / 255)
          * constant_lo = 65535*f - (65535/255) * constant_hi
          */
-
         u16 constant;
 
         struct mali_blend_equation equation;
+
         /*
          * - 0x19 normally
          * - 0x3 when this slot is unused (everything else is 0 except the index)
          * - 0x11 when this is the fourth slot (and it's used)
-+       * - 0 when there is a blend shader
+         * - 0 when there is a blend shader
          */
         u16 unk2;
+
         /* increments from 0 to 3 */
         u16 index;
 
-       union {
-               struct {
-                       /* So far, I've only seen:
-                        * - R001 for 1-component formats
-                        * - RG01 for 2-component formats
-                        * - RGB1 for 3-component formats
-                        * - RGBA for 4-component formats
-                        */
-                       u32 swizzle : 12;
-                       enum mali_format format : 8;
-
-                       /* Type of the shader output variable. Note, this can
-                         * be different from the format.
-                         * enum bifrost_shader_type
-                        */
-                       u32 shader_type : 3;
-                       u32 zero : 9;
-               };
-
-               /* Only the low 32 bits of the blend shader are stored, the
-                * high 32 bits are implicitly the same as the original shader.
-                * According to the kernel driver, the program counter for
-                * shaders is actually only 24 bits, so shaders cannot cross
-                * the 2^24-byte boundary, and neither can the blend shader.
-                * The blob handles this by allocating a 2^24 byte pool for
-                * shaders, and making sure that any blend shaders are stored
-                * in the same pool as the original shader. The kernel will
-                * make sure this allocation is aligned to 2^24 bytes.
-                */
-               u32 shader;
-       };
+        union {
+                struct {
+                        /* So far, I've only seen:
+                         * - R001 for 1-component formats
+                         * - RG01 for 2-component formats
+                         * - RGB1 for 3-component formats
+                         * - RGBA for 4-component formats
+                         */
+                        u32 swizzle : 12;
+                        enum mali_format format : 8;
+
+                        /* Type of the shader output variable. Note, this can
+                          * be different from the format.
+                          * enum bifrost_shader_type
+                         */
+                        u32 zero1 : 4;
+                        u32 shader_type : 3;
+                        u32 zero2 : 5;
+                };
+
+                /* Only the low 32 bits of the blend shader are stored, the
+                 * high 32 bits are implicitly the same as the original shader.
+                 * According to the kernel driver, the program counter for
+                 * shaders is actually only 24 bits, so shaders cannot cross
+                 * the 2^24-byte boundary, and neither can the blend shader.
+                 * The blob handles this by allocating a 2^24 byte pool for
+                 * shaders, and making sure that any blend shaders are stored
+                 * in the same pool as the original shader. The kernel will
+                 * make sure this allocation is aligned to 2^24 bytes.
+                 */
+                u32 shader;
+        };
 } __attribute__((packed));
 
 /* Descriptor for the shader. Following this is at least one, up to four blend
index 79ab9d953f8e3ca85be9a98ab1de084c7bed07dd..faa01cd751794016d041953e7a32af374209ada9 100644 (file)
@@ -1606,8 +1606,51 @@ pandecode_bifrost_blend(void *descs, int job_no, int rt_no)
 
         /* TODO figure out blend shader enable bit */
         pandecode_blend_equation(&b->equation);
+
         pandecode_prop("unk2 = 0x%" PRIx16, b->unk2);
         pandecode_prop("index = 0x%" PRIx16, b->index);
+
+        pandecode_log(".format = ");
+        pandecode_format_short(b->format, false);
+        pandecode_swizzle(b->swizzle, b->format);
+        pandecode_log_cont(",\n");
+
+        pandecode_prop("swizzle = 0x%" PRIx32, b->swizzle);
+        pandecode_prop("format = 0x%" PRIx32, b->format);
+
+        if (b->zero1) {
+                pandecode_msg("XXX: pandecode_bifrost_blend zero1 tripped\n");
+                pandecode_prop("zero1 = 0x%" PRIx32, b->zero1);
+        }
+
+        pandecode_log(".shader_type = ");
+        switch(b->shader_type) {
+        case BIFROST_BLEND_F16:
+                pandecode_log_cont("BIFROST_BLEND_F16");
+                break;
+        case BIFROST_BLEND_F32:
+                pandecode_log_cont("BIFROST_BLEND_F32");
+                break;
+        case BIFROST_BLEND_I32:
+                pandecode_log_cont("BIFROST_BLEND_I32");
+                break;
+        case BIFROST_BLEND_U32:
+                pandecode_log_cont("BIFROST_BLEND_U32");
+                break;
+        case BIFROST_BLEND_I16:
+                pandecode_log_cont("BIFROST_BLEND_I16");
+                break;
+        case BIFROST_BLEND_U16:
+                pandecode_log_cont("BIFROST_BLEND_U16");
+                break;
+        }
+        pandecode_log_cont(",\n");
+
+        if (b->zero2) {
+                pandecode_msg("XXX: pandecode_bifrost_blend zero2 tripped\n");
+                pandecode_prop("zero2 = 0x%" PRIx32, b->zero2);
+        }
+
         pandecode_prop("shader = 0x%" PRIx32, b->shader);
 
         pandecode_indent--;
index 6f1b60deacfbeadf98ebb9db9e5860d6b0298d77..c57a8e01c34bc67adcaeac8abc19ef92d3777f79 100644 (file)
@@ -87,7 +87,7 @@ typedef struct {
         int uniform_cutoff;
 
         /* For Bifrost - output type for each RT */
-        nir_alu_type blend_types[8];
+        nir_alu_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT];
 
         /* Prepended before uniforms, mapping to SYSVAL_ names for the
          * sysval */