From 3c98c452f012d20bcca3038af88bcbe7278d9c68 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Fri, 24 Apr 2020 08:40:51 +0200 Subject: [PATCH] panfrost: Emit blend descriptors on Bifrost Signed-off-by: Tomeu Vizoso Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 29 +++++++ src/gallium/drivers/panfrost/pan_cmdstream.c | 87 +++++++++++++++----- src/gallium/drivers/panfrost/pan_context.h | 2 + src/panfrost/bifrost/bifrost_compile.c | 3 +- src/panfrost/encoder/pan_format.c | 17 ++++ src/panfrost/encoder/pan_texture.h | 3 + src/panfrost/include/panfrost-job.h | 70 ++++++++-------- src/panfrost/pandecode/decode.c | 43 ++++++++++ src/panfrost/util/pan_ir.h | 2 +- 9 files changed, 200 insertions(+), 56 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 4f8e14b105e..4c047c76a84 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -90,6 +90,31 @@ pan_format_from_glsl(const struct glsl_type *type) MALI_NR_CHANNELS(4); } +static enum bifrost_shader_type +bifrost_blend_type_from_nir(nir_alu_type nir_type) +{ + switch(nir_type) { + case 0: /* Render target not in use */ + return 0; + case nir_type_float16: + return BIFROST_BLEND_F16; + case nir_type_float32: + return BIFROST_BLEND_F32; + case nir_type_int32: + return BIFROST_BLEND_I32; + case nir_type_uint32: + return BIFROST_BLEND_U32; + case nir_type_int16: + return BIFROST_BLEND_I16; + case nir_type_uint16: + return BIFROST_BLEND_U16; + default: + DBG("Unsupported blend shader type for NIR alu type %d", nir_type); + assert(0); + return 0; + } +} + void panfrost_shader_compile(struct panfrost_context *ctx, enum pipe_shader_ir ir_type, @@ -197,6 +222,10 @@ panfrost_shader_compile(struct panfrost_context *ctx, state->uniform_cutoff = program.uniform_cutoff; state->work_reg_count = program.work_register_count; + if (dev->quirks & IS_BIFROST) + for (unsigned i = 0; i < BIFROST_MAX_RENDER_TARGET_COUNT; i++) + state->blend_types[i] = bifrost_blend_type_from_nir(program.blend_types[i]); + unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1); unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2); unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4); diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index dfc5174bef5..439a09ca54c 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -615,7 +615,7 @@ panfrost_frag_meta_zsa_update(struct panfrost_context *ctx, static void panfrost_frag_meta_blend_update(struct panfrost_context *ctx, struct mali_shader_meta *fragmeta, - struct midgard_blend_rt *rts) + void *rts) { const struct panfrost_device *dev = pan_device(ctx->base.screen); @@ -679,22 +679,56 @@ panfrost_frag_meta_blend_update(struct panfrost_context *ctx, /* Additional blend descriptor tacked on for jobs using MFBD */ for (unsigned i = 0; i < rt_count; ++i) { - rts[i].flags = 0x200; + if (dev->quirks & IS_BIFROST) { + struct bifrost_blend_rt *brts = rts; + struct panfrost_shader_state *fs; + fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); + + brts[i].flags = 0x200; + if (blend[i].is_shader) { + /* The blend shader's address needs to be at + * the same top 32 bit as the fragment shader. + * TODO: Ensure that's always the case. + */ + assert((blend[i].shader.gpu & (0xffffffffull << 32)) == + (fs->bo->gpu & (0xffffffffull << 32))); + brts[i].shader = blend[i].shader.gpu; + brts[i].unk2 = 0x0; + } else { + enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format; + const struct util_format_description *format_desc; + format_desc = util_format_description(format); + + brts[i].equation = *blend[i].equation.equation; + + /* TODO: this is a bit more complicated */ + brts[i].constant = blend[i].equation.constant; + + brts[i].format = panfrost_format_to_bifrost_blend(format_desc); + brts[i].unk2 = 0x19; + + brts[i].shader_type = fs->blend_types[i]; + } + } else { + struct midgard_blend_rt *mrts = rts; - bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) && - (ctx->pipe_framebuffer.cbufs[i]) && - util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format); + mrts[i].flags = 0x200; - SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader); - SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending); - SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); - SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); + bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) && + (ctx->pipe_framebuffer.cbufs[i]) && + util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format); - if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; - } else { - rts[i].blend.equation = *blend[i].equation.equation; - rts[i].blend.constant = blend[i].equation.constant; + SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader); + SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending); + SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb); + SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); + + if (blend[i].is_shader) { + mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; + } else { + mrts[i].blend.equation = *blend[i].equation.equation; + mrts[i].blend.constant = blend[i].equation.constant; + } } } } @@ -702,7 +736,7 @@ panfrost_frag_meta_blend_update(struct panfrost_context *ctx, static void panfrost_frag_shader_meta_init(struct panfrost_context *ctx, struct mali_shader_meta *fragmeta, - struct midgard_blend_rt *rts) + void *rts) { const struct panfrost_device *dev = pan_device(ctx->base.screen); struct panfrost_shader_state *fs; @@ -779,20 +813,31 @@ panfrost_emit_shader_meta(struct panfrost_batch *batch, struct panfrost_device *dev = pan_device(ctx->base.screen); unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); size_t desc_size = sizeof(meta); - struct midgard_blend_rt rts[4]; + void *rts = NULL; struct panfrost_transfer xfer; + unsigned rt_size; - assert(rt_count <= ARRAY_SIZE(rts)); + if (dev->quirks & MIDGARD_SFBD) + rt_size = 0; + else if (dev->quirks & IS_BIFROST) + rt_size = sizeof(struct bifrost_blend_rt); + else + rt_size = sizeof(struct midgard_blend_rt); - panfrost_frag_shader_meta_init(ctx, &meta, rts); + desc_size += rt_size * rt_count; - if (!(dev->quirks & MIDGARD_SFBD)) - desc_size += sizeof(*rts) * rt_count; + if (rt_size) + rts = rzalloc_size(ctx, rt_size * rt_count); + + panfrost_frag_shader_meta_init(ctx, &meta, rts); xfer = panfrost_allocate_transient(batch, desc_size); memcpy(xfer.cpu, &meta, sizeof(meta)); - memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count); + memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count); + + if (rt_size) + ralloc_free(rts); shader_ptr = xfer.gpu; } else { diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index d43c202a855..c0a6d5fa1d6 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -195,6 +195,8 @@ struct panfrost_shader_state { unsigned stack_size; unsigned shared_size; + /* For Bifrost - output type for each RT */ + enum bifrost_shader_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT]; unsigned int varying_count; struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS]; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 1586bdcbe2c..ca4aa427b49 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -155,8 +155,9 @@ bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr) .vector_channels = 4 }; - assert(blend.blend_location < 8); + assert(blend.blend_location < BIFROST_MAX_RENDER_TARGET_COUNT); assert(ctx->blend_types); + assert(blend.src_types[0]); ctx->blend_types[blend.blend_location] = blend.src_types[0]; bi_emit(ctx, blend); diff --git a/src/panfrost/encoder/pan_format.c b/src/panfrost/encoder/pan_format.c index 873c4d227d2..408f815b6f8 100644 --- a/src/panfrost/encoder/pan_format.c +++ b/src/panfrost/encoder/pan_format.c @@ -275,3 +275,20 @@ panfrost_invert_swizzle(const unsigned char *in, unsigned char *out) out[idx] = PIPE_SWIZZLE_X + c; } } + +enum mali_format +panfrost_format_to_bifrost_blend(const struct util_format_description *desc) +{ + enum mali_format format = panfrost_find_format(desc); + + switch (format) { + case MALI_RGBA4_UNORM: + return MALI_RGBA4; + case MALI_RGBA8_UNORM: + return MALI_RGBA8_2; + case MALI_RGB10_A2_UNORM: + return MALI_RGB10_A2_2; + default: + return format; + } +} diff --git a/src/panfrost/encoder/pan_texture.h b/src/panfrost/encoder/pan_texture.h index d049ada9fdc..5bcda798ac3 100644 --- a/src/panfrost/encoder/pan_texture.h +++ b/src/panfrost/encoder/pan_texture.h @@ -146,4 +146,7 @@ panfrost_get_default_swizzle(unsigned components) } } +enum mali_format +panfrost_format_to_bifrost_blend(const struct util_format_description *desc); + #endif diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index ae70cb6b656..924c4e45518 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -466,6 +466,8 @@ enum bifrost_shader_type { BIFROST_BLEND_U16 = 5, }; +#define BIFROST_MAX_RENDER_TARGET_COUNT 8 + struct bifrost_blend_rt { /* This is likely an analogue of the flags on * midgard_blend_rt */ @@ -482,51 +484,53 @@ struct bifrost_blend_rt { * constant_hi = int(f / 255) * constant_lo = 65535*f - (65535/255) * constant_hi */ - u16 constant; struct mali_blend_equation equation; + /* * - 0x19 normally * - 0x3 when this slot is unused (everything else is 0 except the index) * - 0x11 when this is the fourth slot (and it's used) -+ * - 0 when there is a blend shader + * - 0 when there is a blend shader */ u16 unk2; + /* increments from 0 to 3 */ u16 index; - union { - struct { - /* So far, I've only seen: - * - R001 for 1-component formats - * - RG01 for 2-component formats - * - RGB1 for 3-component formats - * - RGBA for 4-component formats - */ - u32 swizzle : 12; - enum mali_format format : 8; - - /* Type of the shader output variable. Note, this can - * be different from the format. - * enum bifrost_shader_type - */ - u32 shader_type : 3; - u32 zero : 9; - }; - - /* Only the low 32 bits of the blend shader are stored, the - * high 32 bits are implicitly the same as the original shader. - * According to the kernel driver, the program counter for - * shaders is actually only 24 bits, so shaders cannot cross - * the 2^24-byte boundary, and neither can the blend shader. - * The blob handles this by allocating a 2^24 byte pool for - * shaders, and making sure that any blend shaders are stored - * in the same pool as the original shader. The kernel will - * make sure this allocation is aligned to 2^24 bytes. - */ - u32 shader; - }; + union { + struct { + /* So far, I've only seen: + * - R001 for 1-component formats + * - RG01 for 2-component formats + * - RGB1 for 3-component formats + * - RGBA for 4-component formats + */ + u32 swizzle : 12; + enum mali_format format : 8; + + /* Type of the shader output variable. Note, this can + * be different from the format. + * enum bifrost_shader_type + */ + u32 zero1 : 4; + u32 shader_type : 3; + u32 zero2 : 5; + }; + + /* Only the low 32 bits of the blend shader are stored, the + * high 32 bits are implicitly the same as the original shader. + * According to the kernel driver, the program counter for + * shaders is actually only 24 bits, so shaders cannot cross + * the 2^24-byte boundary, and neither can the blend shader. + * The blob handles this by allocating a 2^24 byte pool for + * shaders, and making sure that any blend shaders are stored + * in the same pool as the original shader. The kernel will + * make sure this allocation is aligned to 2^24 bytes. + */ + u32 shader; + }; } __attribute__((packed)); /* Descriptor for the shader. Following this is at least one, up to four blend diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c index 79ab9d953f8..faa01cd7517 100644 --- a/src/panfrost/pandecode/decode.c +++ b/src/panfrost/pandecode/decode.c @@ -1606,8 +1606,51 @@ pandecode_bifrost_blend(void *descs, int job_no, int rt_no) /* TODO figure out blend shader enable bit */ pandecode_blend_equation(&b->equation); + pandecode_prop("unk2 = 0x%" PRIx16, b->unk2); pandecode_prop("index = 0x%" PRIx16, b->index); + + pandecode_log(".format = "); + pandecode_format_short(b->format, false); + pandecode_swizzle(b->swizzle, b->format); + pandecode_log_cont(",\n"); + + pandecode_prop("swizzle = 0x%" PRIx32, b->swizzle); + pandecode_prop("format = 0x%" PRIx32, b->format); + + if (b->zero1) { + pandecode_msg("XXX: pandecode_bifrost_blend zero1 tripped\n"); + pandecode_prop("zero1 = 0x%" PRIx32, b->zero1); + } + + pandecode_log(".shader_type = "); + switch(b->shader_type) { + case BIFROST_BLEND_F16: + pandecode_log_cont("BIFROST_BLEND_F16"); + break; + case BIFROST_BLEND_F32: + pandecode_log_cont("BIFROST_BLEND_F32"); + break; + case BIFROST_BLEND_I32: + pandecode_log_cont("BIFROST_BLEND_I32"); + break; + case BIFROST_BLEND_U32: + pandecode_log_cont("BIFROST_BLEND_U32"); + break; + case BIFROST_BLEND_I16: + pandecode_log_cont("BIFROST_BLEND_I16"); + break; + case BIFROST_BLEND_U16: + pandecode_log_cont("BIFROST_BLEND_U16"); + break; + } + pandecode_log_cont(",\n"); + + if (b->zero2) { + pandecode_msg("XXX: pandecode_bifrost_blend zero2 tripped\n"); + pandecode_prop("zero2 = 0x%" PRIx32, b->zero2); + } + pandecode_prop("shader = 0x%" PRIx32, b->shader); pandecode_indent--; diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 6f1b60deacf..c57a8e01c34 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -87,7 +87,7 @@ typedef struct { int uniform_cutoff; /* For Bifrost - output type for each RT */ - nir_alu_type blend_types[8]; + nir_alu_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT]; /* Prepended before uniforms, mapping to SYSVAL_ names for the * sysval */ -- 2.30.2