MALI_NR_CHANNELS(4);
}
+static enum bifrost_shader_type
+bifrost_blend_type_from_nir(nir_alu_type nir_type)
+{
+ switch(nir_type) {
+ case 0: /* Render target not in use */
+ return 0;
+ case nir_type_float16:
+ return BIFROST_BLEND_F16;
+ case nir_type_float32:
+ return BIFROST_BLEND_F32;
+ case nir_type_int32:
+ return BIFROST_BLEND_I32;
+ case nir_type_uint32:
+ return BIFROST_BLEND_U32;
+ case nir_type_int16:
+ return BIFROST_BLEND_I16;
+ case nir_type_uint16:
+ return BIFROST_BLEND_U16;
+ default:
+ DBG("Unsupported blend shader type for NIR alu type %d", nir_type);
+ assert(0);
+ return 0;
+ }
+}
+
void
panfrost_shader_compile(struct panfrost_context *ctx,
enum pipe_shader_ir ir_type,
state->uniform_cutoff = program.uniform_cutoff;
state->work_reg_count = program.work_register_count;
+ if (dev->quirks & IS_BIFROST)
+ for (unsigned i = 0; i < BIFROST_MAX_RENDER_TARGET_COUNT; i++)
+ state->blend_types[i] = bifrost_blend_type_from_nir(program.blend_types[i]);
+
unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
static void
panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta,
- struct midgard_blend_rt *rts)
+ void *rts)
{
const struct panfrost_device *dev = pan_device(ctx->base.screen);
/* Additional blend descriptor tacked on for jobs using MFBD */
for (unsigned i = 0; i < rt_count; ++i) {
- rts[i].flags = 0x200;
+ if (dev->quirks & IS_BIFROST) {
+ struct bifrost_blend_rt *brts = rts;
+ struct panfrost_shader_state *fs;
+ fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
+
+ brts[i].flags = 0x200;
+ if (blend[i].is_shader) {
+ /* The blend shader's address needs to be at
+ * the same top 32 bit as the fragment shader.
+ * TODO: Ensure that's always the case.
+ */
+ assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
+ (fs->bo->gpu & (0xffffffffull << 32)));
+ brts[i].shader = blend[i].shader.gpu;
+ brts[i].unk2 = 0x0;
+ } else {
+ enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
+ const struct util_format_description *format_desc;
+ format_desc = util_format_description(format);
+
+ brts[i].equation = *blend[i].equation.equation;
+
+ /* TODO: this is a bit more complicated */
+ brts[i].constant = blend[i].equation.constant;
+
+ brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
+ brts[i].unk2 = 0x19;
+
+ brts[i].shader_type = fs->blend_types[i];
+ }
+ } else {
+ struct midgard_blend_rt *mrts = rts;
- bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
- (ctx->pipe_framebuffer.cbufs[i]) &&
- util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
+ mrts[i].flags = 0x200;
- SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
- SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
- SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
- SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+ bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
+ (ctx->pipe_framebuffer.cbufs[i]) &&
+ util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
- if (blend[i].is_shader) {
- rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
- } else {
- rts[i].blend.equation = *blend[i].equation.equation;
- rts[i].blend.constant = blend[i].equation.constant;
+ SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
+ SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
+ SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb);
+ SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+
+ if (blend[i].is_shader) {
+ mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
+ } else {
+ mrts[i].blend.equation = *blend[i].equation.equation;
+ mrts[i].blend.constant = blend[i].equation.constant;
+ }
}
}
}
static void
panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
struct mali_shader_meta *fragmeta,
- struct midgard_blend_rt *rts)
+ void *rts)
{
const struct panfrost_device *dev = pan_device(ctx->base.screen);
struct panfrost_shader_state *fs;
struct panfrost_device *dev = pan_device(ctx->base.screen);
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
size_t desc_size = sizeof(meta);
- struct midgard_blend_rt rts[4];
+ void *rts = NULL;
struct panfrost_transfer xfer;
+ unsigned rt_size;
- assert(rt_count <= ARRAY_SIZE(rts));
+ if (dev->quirks & MIDGARD_SFBD)
+ rt_size = 0;
+ else if (dev->quirks & IS_BIFROST)
+ rt_size = sizeof(struct bifrost_blend_rt);
+ else
+ rt_size = sizeof(struct midgard_blend_rt);
- panfrost_frag_shader_meta_init(ctx, &meta, rts);
+ desc_size += rt_size * rt_count;
- if (!(dev->quirks & MIDGARD_SFBD))
- desc_size += sizeof(*rts) * rt_count;
+ if (rt_size)
+ rts = rzalloc_size(ctx, rt_size * rt_count);
+
+ panfrost_frag_shader_meta_init(ctx, &meta, rts);
xfer = panfrost_allocate_transient(batch, desc_size);
memcpy(xfer.cpu, &meta, sizeof(meta));
- memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
+ memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
+
+ if (rt_size)
+ ralloc_free(rts);
shader_ptr = xfer.gpu;
} else {
unsigned stack_size;
unsigned shared_size;
+ /* For Bifrost - output type for each RT */
+ enum bifrost_shader_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT];
unsigned int varying_count;
struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
.vector_channels = 4
};
- assert(blend.blend_location < 8);
+ assert(blend.blend_location < BIFROST_MAX_RENDER_TARGET_COUNT);
assert(ctx->blend_types);
+ assert(blend.src_types[0]);
ctx->blend_types[blend.blend_location] = blend.src_types[0];
bi_emit(ctx, blend);
out[idx] = PIPE_SWIZZLE_X + c;
}
}
+
+enum mali_format
+panfrost_format_to_bifrost_blend(const struct util_format_description *desc)
+{
+ enum mali_format format = panfrost_find_format(desc);
+
+ switch (format) {
+ case MALI_RGBA4_UNORM:
+ return MALI_RGBA4;
+ case MALI_RGBA8_UNORM:
+ return MALI_RGBA8_2;
+ case MALI_RGB10_A2_UNORM:
+ return MALI_RGB10_A2_2;
+ default:
+ return format;
+ }
+}
}
}
+enum mali_format
+panfrost_format_to_bifrost_blend(const struct util_format_description *desc);
+
#endif
BIFROST_BLEND_U16 = 5,
};
+#define BIFROST_MAX_RENDER_TARGET_COUNT 8
+
struct bifrost_blend_rt {
/* This is likely an analogue of the flags on
* midgard_blend_rt */
* constant_hi = int(f / 255)
* constant_lo = 65535*f - (65535/255) * constant_hi
*/
-
u16 constant;
struct mali_blend_equation equation;
+
/*
* - 0x19 normally
* - 0x3 when this slot is unused (everything else is 0 except the index)
* - 0x11 when this is the fourth slot (and it's used)
-+ * - 0 when there is a blend shader
+ * - 0 when there is a blend shader
*/
u16 unk2;
+
/* increments from 0 to 3 */
u16 index;
- union {
- struct {
- /* So far, I've only seen:
- * - R001 for 1-component formats
- * - RG01 for 2-component formats
- * - RGB1 for 3-component formats
- * - RGBA for 4-component formats
- */
- u32 swizzle : 12;
- enum mali_format format : 8;
-
- /* Type of the shader output variable. Note, this can
- * be different from the format.
- * enum bifrost_shader_type
- */
- u32 shader_type : 3;
- u32 zero : 9;
- };
-
- /* Only the low 32 bits of the blend shader are stored, the
- * high 32 bits are implicitly the same as the original shader.
- * According to the kernel driver, the program counter for
- * shaders is actually only 24 bits, so shaders cannot cross
- * the 2^24-byte boundary, and neither can the blend shader.
- * The blob handles this by allocating a 2^24 byte pool for
- * shaders, and making sure that any blend shaders are stored
- * in the same pool as the original shader. The kernel will
- * make sure this allocation is aligned to 2^24 bytes.
- */
- u32 shader;
- };
+ union {
+ struct {
+ /* So far, I've only seen:
+ * - R001 for 1-component formats
+ * - RG01 for 2-component formats
+ * - RGB1 for 3-component formats
+ * - RGBA for 4-component formats
+ */
+ u32 swizzle : 12;
+ enum mali_format format : 8;
+
+ /* Type of the shader output variable. Note, this can
+ * be different from the format.
+ * enum bifrost_shader_type
+ */
+ u32 zero1 : 4;
+ u32 shader_type : 3;
+ u32 zero2 : 5;
+ };
+
+ /* Only the low 32 bits of the blend shader are stored, the
+ * high 32 bits are implicitly the same as the original shader.
+ * According to the kernel driver, the program counter for
+ * shaders is actually only 24 bits, so shaders cannot cross
+ * the 2^24-byte boundary, and neither can the blend shader.
+ * The blob handles this by allocating a 2^24 byte pool for
+ * shaders, and making sure that any blend shaders are stored
+ * in the same pool as the original shader. The kernel will
+ * make sure this allocation is aligned to 2^24 bytes.
+ */
+ u32 shader;
+ };
} __attribute__((packed));
/* Descriptor for the shader. Following this is at least one, up to four blend
/* TODO figure out blend shader enable bit */
pandecode_blend_equation(&b->equation);
+
pandecode_prop("unk2 = 0x%" PRIx16, b->unk2);
pandecode_prop("index = 0x%" PRIx16, b->index);
+
+ pandecode_log(".format = ");
+ pandecode_format_short(b->format, false);
+ pandecode_swizzle(b->swizzle, b->format);
+ pandecode_log_cont(",\n");
+
+ pandecode_prop("swizzle = 0x%" PRIx32, b->swizzle);
+ pandecode_prop("format = 0x%" PRIx32, b->format);
+
+ if (b->zero1) {
+ pandecode_msg("XXX: pandecode_bifrost_blend zero1 tripped\n");
+ pandecode_prop("zero1 = 0x%" PRIx32, b->zero1);
+ }
+
+ pandecode_log(".shader_type = ");
+ switch(b->shader_type) {
+ case BIFROST_BLEND_F16:
+ pandecode_log_cont("BIFROST_BLEND_F16");
+ break;
+ case BIFROST_BLEND_F32:
+ pandecode_log_cont("BIFROST_BLEND_F32");
+ break;
+ case BIFROST_BLEND_I32:
+ pandecode_log_cont("BIFROST_BLEND_I32");
+ break;
+ case BIFROST_BLEND_U32:
+ pandecode_log_cont("BIFROST_BLEND_U32");
+ break;
+ case BIFROST_BLEND_I16:
+ pandecode_log_cont("BIFROST_BLEND_I16");
+ break;
+ case BIFROST_BLEND_U16:
+ pandecode_log_cont("BIFROST_BLEND_U16");
+ break;
+ }
+ pandecode_log_cont(",\n");
+
+ if (b->zero2) {
+ pandecode_msg("XXX: pandecode_bifrost_blend zero2 tripped\n");
+ pandecode_prop("zero2 = 0x%" PRIx32, b->zero2);
+ }
+
pandecode_prop("shader = 0x%" PRIx32, b->shader);
pandecode_indent--;
int uniform_cutoff;
/* For Bifrost - output type for each RT */
- nir_alu_type blend_types[8];
+ nir_alu_type blend_types[BIFROST_MAX_RENDER_TARGET_COUNT];
/* Prepended before uniforms, mapping to SYSVAL_ names for the
* sysval */