#include "tgsi/tgsi_dump.h"
void
-panfrost_shader_compile(
- struct panfrost_context *ctx,
- struct mali_shader_meta *meta,
- enum pipe_shader_ir ir_type,
- const void *ir,
- gl_shader_stage stage,
- struct panfrost_shader_state *state,
- uint64_t *outputs_written)
+panfrost_shader_compile(struct panfrost_context *ctx,
+ enum pipe_shader_ir ir_type,
+ const void *ir,
+ gl_shader_stage stage,
+ struct panfrost_shader_state *state,
+ uint64_t *outputs_written)
{
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
uint8_t *dst;
if (size) {
state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
memcpy(state->bo->cpu, dst, size);
- meta->shader = state->bo->gpu | program.first_tag;
state->first_tag = program.first_tag;
} else {
/* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */
- meta->shader = 0x0 | 1;
state->first_tag = 1;
}
state->sysval_count = program.sysval_count;
memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
- meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
- meta->midgard1.work_count = program.work_register_count;
-
bool vertex_id = s->info.system_values_read & (1 << SYSTEM_VALUE_VERTEX_ID);
bool instance_id = s->info.system_values_read & (1 << SYSTEM_VALUE_INSTANCE_ID);
state->uniform_cutoff = program.uniform_cutoff;
state->work_reg_count = program.work_register_count;
- meta->attribute_count = state->attribute_count;
- meta->varying_count = state->varying_count;
- meta->midgard1.flags_hi = 8; /* XXX */
-
unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
void *cso)
{
struct panfrost_context *ctx = pan_context(pipe);
- struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
ctx->blend = pblend;
if (!blend)
return;
-
- if (screen->quirks & MIDGARD_SFBD) {
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
- }
}
static void
tp->postfix.occlusion_counter = 0;
}
+static void
+panfrost_shader_meta_init(struct panfrost_context *ctx,
+ enum pipe_shader_type st,
+ struct mali_shader_meta *meta)
+{
+ struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
+
+ memset(meta, 0, sizeof(*meta));
+ meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
+ meta->midgard1.uniform_count = MIN2(ss->uniform_count,
+ ss->uniform_cutoff);
+ meta->midgard1.work_count = ss->work_reg_count;
+ meta->attribute_count = ss->attribute_count;
+ meta->varying_count = ss->varying_count;
+ meta->midgard1.flags_hi = 0x8; /* XXX */
+ meta->midgard1.flags_lo = 0x220;
+ meta->texture_count = ctx->sampler_view_count[st];
+ meta->sampler_count = ctx->sampler_count[st];
+ meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
+}
+
+unsigned
+panfrost_translate_compare_func(enum pipe_compare_func in)
+{
+ switch (in) {
+ case PIPE_FUNC_NEVER:
+ return MALI_FUNC_NEVER;
+
+ case PIPE_FUNC_LESS:
+ return MALI_FUNC_LESS;
+
+ case PIPE_FUNC_EQUAL:
+ return MALI_FUNC_EQUAL;
+
+ case PIPE_FUNC_LEQUAL:
+ return MALI_FUNC_LEQUAL;
+
+ case PIPE_FUNC_GREATER:
+ return MALI_FUNC_GREATER;
+
+ case PIPE_FUNC_NOTEQUAL:
+ return MALI_FUNC_NOTEQUAL;
+
+ case PIPE_FUNC_GEQUAL:
+ return MALI_FUNC_GEQUAL;
+
+ case PIPE_FUNC_ALWAYS:
+ return MALI_FUNC_ALWAYS;
+
+ default:
+ unreachable("Invalid func");
+ }
+}
+
+static unsigned
+panfrost_translate_stencil_op(enum pipe_stencil_op in)
+{
+ switch (in) {
+ case PIPE_STENCIL_OP_KEEP:
+ return MALI_STENCIL_KEEP;
+
+ case PIPE_STENCIL_OP_ZERO:
+ return MALI_STENCIL_ZERO;
+
+ case PIPE_STENCIL_OP_REPLACE:
+ return MALI_STENCIL_REPLACE;
+
+ case PIPE_STENCIL_OP_INCR:
+ return MALI_STENCIL_INCR;
+
+ case PIPE_STENCIL_OP_DECR:
+ return MALI_STENCIL_DECR;
+
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return MALI_STENCIL_INCR_WRAP;
+
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return MALI_STENCIL_DECR_WRAP;
+
+ case PIPE_STENCIL_OP_INVERT:
+ return MALI_STENCIL_INVERT;
+
+ default:
+ unreachable("Invalid stencil op");
+ }
+}
+
+static void
+panfrost_make_stencil_state(const struct pipe_stencil_state *in,
+ struct mali_stencil_test *out)
+{
+ out->ref = 0; /* Gallium gets it from elsewhere */
+
+ out->mask = in->valuemask;
+ out->func = panfrost_translate_compare_func(in->func);
+ out->sfail = panfrost_translate_stencil_op(in->fail_op);
+ out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
+ out->dppass = panfrost_translate_stencil_op(in->zpass_op);
+}
+
+static void
+panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
+ struct mali_shader_meta *fragmeta)
+{
+ if (!ctx->rasterizer) {
+ SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
+ SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
+ fragmeta->depth_units = 0.0f;
+ fragmeta->depth_factor = 0.0f;
+ SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
+ SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
+ return;
+ }
+
+ bool msaa = ctx->rasterizer->base.multisample;
+
+ /* TODO: Sample size */
+ SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
+ SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
+ fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
+ fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
+
+ /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
+
+ SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
+ ctx->rasterizer->base.offset_tri);
+ SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
+ ctx->rasterizer->base.offset_tri);
+}
+
+static void
+panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
+ struct mali_shader_meta *fragmeta)
+{
+ const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
+ int zfunc = PIPE_FUNC_ALWAYS;
+
+ if (!zsa) {
+ struct pipe_stencil_state default_stencil = {
+ .enabled = 0,
+ .func = PIPE_FUNC_ALWAYS,
+ .fail_op = MALI_STENCIL_KEEP,
+ .zfail_op = MALI_STENCIL_KEEP,
+ .zpass_op = MALI_STENCIL_KEEP,
+ .writemask = 0xFF,
+ .valuemask = 0xFF
+ };
+
+ panfrost_make_stencil_state(&default_stencil,
+ &fragmeta->stencil_front);
+ fragmeta->stencil_mask_front = default_stencil.writemask;
+ fragmeta->stencil_back = fragmeta->stencil_front;
+ fragmeta->stencil_mask_back = default_stencil.writemask;
+ SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
+ SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
+ } else {
+ SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
+ zsa->stencil[0].enabled);
+ panfrost_make_stencil_state(&zsa->stencil[0],
+ &fragmeta->stencil_front);
+ fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
+ fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
+
+ /* If back-stencil is not enabled, use the front values */
+
+ if (zsa->stencil[1].enabled) {
+ panfrost_make_stencil_state(&zsa->stencil[1],
+ &fragmeta->stencil_back);
+ fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
+ fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
+ } else {
+ fragmeta->stencil_back = fragmeta->stencil_front;
+ fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
+ fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
+ }
+
+ if (zsa->depth.enabled)
+ zfunc = zsa->depth.func;
+
+ /* Depth state (TODO: Refactor) */
+
+ SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
+ zsa->depth.writemask);
+ }
+
+ fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
+ fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
+}
+
+static void
+panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
+ struct mali_shader_meta *fragmeta,
+ struct midgard_blend_rt *rts)
+{
+ const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+
+ SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
+ (screen->quirks & MIDGARD_SFBD) && ctx->blend &&
+ !ctx->blend->base.dither);
+
+ /* Get blending setup */
+ unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+
+ struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
+ unsigned shader_offset = 0;
+ struct panfrost_bo *shader_bo = NULL;
+
+ for (unsigned c = 0; c < rt_count; ++c)
+ blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
+ &shader_offset);
+
+ /* If there is a blend shader, work registers are shared. XXX: opt */
+
+ for (unsigned c = 0; c < rt_count; ++c) {
+ if (blend[c].is_shader)
+ fragmeta->midgard1.work_count = 16;
+ }
+
+ /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
+ * copied to the blend_meta appended (by convention), but this is the
+ * field actually read by the hardware. (Or maybe both are read...?).
+ * Specify the last RTi with a blend shader. */
+
+ fragmeta->blend.shader = 0;
+
+ for (signed rt = (rt_count - 1); rt >= 0; --rt) {
+ if (!blend[rt].is_shader)
+ continue;
+
+ fragmeta->blend.shader = blend[rt].shader.gpu |
+ blend[rt].shader.first_tag;
+ break;
+ }
+
+ if (screen->quirks & MIDGARD_SFBD) {
+ /* When only a single render target platform is used, the blend
+ * information is inside the shader meta itself. We additionally
+ * need to signal CAN_DISCARD for nontrivial blend modes (so
+ * we're able to read back the destination buffer) */
+
+ SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
+ blend[0].is_shader);
+
+ if (!blend[0].is_shader) {
+ fragmeta->blend.equation = *blend[0].equation.equation;
+ fragmeta->blend.constant = blend[0].equation.constant;
+ }
+
+ SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
+ !blend[0].no_blending);
+ return;
+ }
+
+ /* Additional blend descriptor tacked on for jobs using MFBD */
+
+ for (unsigned i = 0; i < rt_count; ++i) {
+ rts[i].flags = 0x200;
+
+ bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
+ (ctx->pipe_framebuffer.cbufs[i]) &&
+ util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
+
+ SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
+ SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
+ SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
+ SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
+
+ if (blend[i].is_shader) {
+ rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
+ } else {
+ rts[i].blend.equation = *blend[i].equation.equation;
+ rts[i].blend.constant = blend[i].equation.constant;
+ }
+ }
+}
+
+static void
+panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
+ struct mali_shader_meta *fragmeta,
+ struct midgard_blend_rt *rts)
+{
+ const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+ struct panfrost_shader_state *fs;
+
+ fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
+
+ fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
+ fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
+ fragmeta->unknown2_4 = 0x4e0;
+
+ /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
+ * is required (independent of 32-bit/64-bit descriptors), or why it's
+ * not used on later GPU revisions. Otherwise, all shader jobs fault on
+ * these earlier chips (perhaps this is a chicken bit of some kind).
+ * More investigation is needed. */
+
+ SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD);
+
+ /* Depending on whether it's legal to in the given shader, we try to
+ * enable early-z testing (or forward-pixel kill?) */
+
+ SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
+ !fs->can_discard && !fs->writes_depth);
+
+ /* Add the writes Z/S flags if needed. */
+ SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
+ SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
+
+ /* Any time texturing is used, derivatives are implicitly calculated,
+ * so we need to enable helper invocations */
+
+ SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
+ fs->helper_invocations);
+
+ /* CAN_DISCARD should be set if the fragment shader possibly contains a
+ * 'discard' instruction. It is likely this is related to optimizations
+ * related to forward-pixel kill, as per "Mali Performance 3: Is
+ * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
+
+ SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
+ SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
+
+ panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
+ panfrost_frag_meta_zsa_update(ctx, fragmeta);
+ panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
+}
+
void
panfrost_emit_shader_meta(struct panfrost_batch *batch,
enum pipe_shader_type st,
return;
}
+ struct mali_shader_meta meta;
+
+ panfrost_shader_meta_init(ctx, st, &meta);
+
/* Add the shader BO to the batch. */
panfrost_batch_add_bo(batch, ss->bo,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
panfrost_bo_access_for_stage(st));
- vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
- sizeof(*ss->tripipe));
+ mali_ptr shader_ptr;
+
+ if (st == PIPE_SHADER_FRAGMENT) {
+ struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+ unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+ size_t desc_size = sizeof(meta);
+ struct midgard_blend_rt rts[4];
+ struct panfrost_transfer xfer;
+
+ assert(rt_count <= ARRAY_SIZE(rts));
+
+ panfrost_frag_shader_meta_init(ctx, &meta, rts);
+
+ if (!(screen->quirks & MIDGARD_SFBD))
+ desc_size += sizeof(*rts) * rt_count;
+
+ xfer = panfrost_allocate_transient(batch, desc_size);
+
+ memcpy(xfer.cpu, &meta, sizeof(meta));
+ memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
+
+ shader_ptr = xfer.gpu;
+ } else {
+ shader_ptr = panfrost_upload_transient(batch, &meta,
+ sizeof(meta));
+ }
+
+ vtp->postfix.shader = shader_ptr;
}
static void
#include "pan_job.h"
+unsigned
+panfrost_translate_compare_func(enum pipe_compare_func in);
+
void
panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
struct midgard_payload_vertex_tiler *vt);
so->variant_count = 1;
so->active_variant = 0;
- /* calloc, instead of malloc - to zero unused fields */
- v->tripipe = CALLOC_STRUCT(mali_shader_meta);
-
if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
struct blob_reader reader;
const struct pipe_binary_program_header *hdr = cso->prog;
so->cbase.ir_type = PIPE_SHADER_IR_NIR;
}
- panfrost_shader_compile(ctx, v->tripipe,
- so->cbase.ir_type, so->cbase.prog,
- MESA_SHADER_COMPUTE, v, NULL);
+ panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
+ MESA_SHADER_COMPUTE, v, NULL);
return so;
}
if (info->input)
pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
- panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
panfrost_emit_shader_meta(batch, PIPE_SHADER_COMPUTE, payload);
panfrost_emit_const_buf(batch, PIPE_SHADER_COMPUTE, payload);
panfrost_emit_shared_memory(batch, info, payload);
}
}
-static unsigned
-panfrost_translate_compare_func(enum pipe_compare_func in)
-{
- switch (in) {
- case PIPE_FUNC_NEVER:
- return MALI_FUNC_NEVER;
-
- case PIPE_FUNC_LESS:
- return MALI_FUNC_LESS;
-
- case PIPE_FUNC_EQUAL:
- return MALI_FUNC_EQUAL;
-
- case PIPE_FUNC_LEQUAL:
- return MALI_FUNC_LEQUAL;
-
- case PIPE_FUNC_GREATER:
- return MALI_FUNC_GREATER;
-
- case PIPE_FUNC_NOTEQUAL:
- return MALI_FUNC_NOTEQUAL;
-
- case PIPE_FUNC_GEQUAL:
- return MALI_FUNC_GEQUAL;
-
- case PIPE_FUNC_ALWAYS:
- return MALI_FUNC_ALWAYS;
-
- default:
- unreachable("Invalid func");
- }
-}
-
-static unsigned
-panfrost_translate_stencil_op(enum pipe_stencil_op in)
-{
- switch (in) {
- case PIPE_STENCIL_OP_KEEP:
- return MALI_STENCIL_KEEP;
-
- case PIPE_STENCIL_OP_ZERO:
- return MALI_STENCIL_ZERO;
-
- case PIPE_STENCIL_OP_REPLACE:
- return MALI_STENCIL_REPLACE;
-
- case PIPE_STENCIL_OP_INCR:
- return MALI_STENCIL_INCR;
-
- case PIPE_STENCIL_OP_DECR:
- return MALI_STENCIL_DECR;
-
- case PIPE_STENCIL_OP_INCR_WRAP:
- return MALI_STENCIL_INCR_WRAP;
-
- case PIPE_STENCIL_OP_DECR_WRAP:
- return MALI_STENCIL_DECR_WRAP;
-
- case PIPE_STENCIL_OP_INVERT:
- return MALI_STENCIL_INVERT;
-
- default:
- unreachable("Invalid stencil op");
- }
-}
-
-static void
-panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
-{
- out->ref = 0; /* Gallium gets it from elsewhere */
-
- out->mask = in->valuemask;
- out->func = panfrost_translate_compare_func(in->func);
- out->sfail = panfrost_translate_stencil_op(in->fail_op);
- out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
- out->dppass = panfrost_translate_stencil_op(in->zpass_op);
-}
-
-static void
-panfrost_default_shader_backend(struct panfrost_context *ctx)
-{
- struct panfrost_screen *screen = pan_screen(ctx->base.screen);
- struct mali_shader_meta shader = {
- .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
-
- .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
- .unknown2_4 = MALI_NO_MSAA | 0x4e0,
- };
-
- /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this is
- * required (independent of 32-bit/64-bit descriptors), or why it's not
- * used on later GPU revisions. Otherwise, all shader jobs fault on
- * these earlier chips (perhaps this is a chicken bit of some kind).
- * More investigation is needed. */
-
- if (screen->quirks & MIDGARD_SFBD)
- shader.unknown2_4 |= 0x10;
-
- struct pipe_stencil_state default_stencil = {
- .enabled = 0,
- .func = PIPE_FUNC_ALWAYS,
- .fail_op = MALI_STENCIL_KEEP,
- .zfail_op = MALI_STENCIL_KEEP,
- .zpass_op = MALI_STENCIL_KEEP,
- .writemask = 0xFF,
- .valuemask = 0xFF
- };
-
- panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
- shader.stencil_mask_front = default_stencil.writemask;
-
- panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
- shader.stencil_mask_back = default_stencil.writemask;
-
- if (default_stencil.enabled)
- shader.unknown2_4 |= MALI_STENCIL_TEST;
-
- memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
-}
-
bool
panfrost_writes_point_size(struct panfrost_context *ctx)
{
return 32 - __builtin_clz(mask);
}
-/* Fixes up a shader state with current state */
-
-void
-panfrost_patch_shader_state(struct panfrost_context *ctx,
- enum pipe_shader_type stage)
-{
- struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, stage);
-
- if (!ss)
- return;
-
- ss->tripipe->texture_count = ctx->sampler_view_count[stage];
- ss->tripipe->sampler_count = ctx->sampler_count[stage];
-
- ss->tripipe->midgard1.flags_lo = 0x220;
-
- unsigned ubo_count = panfrost_ubo_count(ctx, stage);
- ss->tripipe->midgard1.uniform_buffer_count = ubo_count;
-}
-
/* Go through dirty flags and actualise them in the cmdstream. */
static void
panfrost_emit_for_draw(struct panfrost_context *ctx)
{
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
- struct panfrost_screen *screen = pan_screen(ctx->base.screen);
panfrost_batch_add_fbo_bos(batch);
unsigned total_count = ctx->padded_count * ctx->instance_count;
panfrost_emit_varying_descriptor(ctx, total_count);
- if (ctx->rasterizer) {
- bool msaa = ctx->rasterizer->base.multisample;
-
- /* TODO: Sample size */
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
- }
-
panfrost_batch_set_requirements(batch);
panfrost_vt_update_rasterizer(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
panfrost_vt_update_occlusion_query(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
- panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX,
&ctx->payloads[PIPE_SHADER_VERTEX]);
-
- if (ctx->shader[PIPE_SHADER_FRAGMENT]) {
- struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-
- panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
-
-#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
-
- COPY(shader);
- COPY(attribute_count);
- COPY(varying_count);
- COPY(texture_count);
- COPY(sampler_count);
- COPY(midgard1.uniform_count);
- COPY(midgard1.uniform_buffer_count);
- COPY(midgard1.work_count);
- COPY(midgard1.flags_lo);
- COPY(midgard1.flags_hi);
-
-#undef COPY
-
- /* Get blending setup */
- unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
-
- struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
- unsigned shader_offset = 0;
- struct panfrost_bo *shader_bo = NULL;
-
- for (unsigned c = 0; c < rt_count; ++c) {
- blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset);
- }
-
- /* If there is a blend shader, work registers are shared. XXX: opt */
-
- for (unsigned c = 0; c < rt_count; ++c) {
- if (blend[c].is_shader)
- ctx->fragment_shader_core.midgard1.work_count = 16;
- }
-
- /* Depending on whether it's legal to in the given shader, we
- * try to enable early-z testing (or forward-pixel kill?) */
-
- SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z,
- !variant->can_discard && !variant->writes_depth);
-
- /* Add the writes Z/S flags if needed. */
- SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo,
- MALI_WRITES_Z, variant->writes_depth);
- SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi,
- MALI_WRITES_S, variant->writes_stencil);
-
- /* Any time texturing is used, derivatives are implicitly
- * calculated, so we need to enable helper invocations */
-
- SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations);
-
- /* Assign the stencil refs late */
-
- unsigned front_ref = ctx->stencil_ref.ref_value[0];
- unsigned back_ref = ctx->stencil_ref.ref_value[1];
- bool back_enab = ctx->depth_stencil->stencil[1].enabled;
-
- ctx->fragment_shader_core.stencil_front.ref = front_ref;
- ctx->fragment_shader_core.stencil_back.ref = back_enab ? back_ref : front_ref;
-
- /* CAN_DISCARD should be set if the fragment shader possibly
- * contains a 'discard' instruction. It is likely this is
- * related to optimizations related to forward-pixel kill, as
- * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
- * thing?" by Peter Harris
- */
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard);
- SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard);
-
- /* Even on MFBD, the shader descriptor gets blend shaders. It's
- * *also* copied to the blend_meta appended (by convention),
- * but this is the field actually read by the hardware. (Or
- * maybe both are read...?). Specify the last RTi with a blend
- * shader. */
-
- ctx->fragment_shader_core.blend.shader = 0;
-
- for (signed rt = (rt_count - 1); rt >= 0; --rt) {
- if (blend[rt].is_shader) {
- ctx->fragment_shader_core.blend.shader =
- blend[rt].shader.gpu | blend[rt].shader.first_tag;
- break;
- }
- }
-
- if (screen->quirks & MIDGARD_SFBD) {
- /* When only a single render target platform is used, the blend
- * information is inside the shader meta itself. We
- * additionally need to signal CAN_DISCARD for nontrivial blend
- * modes (so we're able to read back the destination buffer) */
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader);
-
- if (!blend[0].is_shader) {
- ctx->fragment_shader_core.blend.equation =
- *blend[0].equation.equation;
- ctx->fragment_shader_core.blend.constant =
- blend[0].equation.constant;
- }
-
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending);
- }
-
- size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
- struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
- memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
-
- ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu;
-
- if (!(screen->quirks & MIDGARD_SFBD)) {
- /* Additional blend descriptor tacked on for jobs using MFBD */
-
- struct midgard_blend_rt rts[4];
-
- for (unsigned i = 0; i < rt_count; ++i) {
- rts[i].flags = 0x200;
-
- bool is_srgb =
- (ctx->pipe_framebuffer.nr_cbufs > i) &&
- (ctx->pipe_framebuffer.cbufs[i]) &&
- util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
-
- SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
- SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
- SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
- SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
-
- if (blend[i].is_shader) {
- rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
- } else {
- rts[i].blend.equation = *blend[i].equation.equation;
- rts[i].blend.constant = blend[i].equation.constant;
- }
- }
-
- memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count);
- }
- }
+ panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
+ &ctx->payloads[PIPE_SHADER_FRAGMENT]);
/* We stage to transient, so always dirty.. */
if (ctx->vertex)
if (!hwcso)
return;
- ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
- ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
-
/* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
assert(ctx->rasterizer->base.offset_clamp == 0.0);
- /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
-
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_A, ctx->rasterizer->base.offset_tri);
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_B, ctx->rasterizer->base.offset_tri);
-
/* Point sprites are emulated */
struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) {
struct panfrost_context *ctx = pan_context(pctx);
- struct mali_shader_meta meta;
struct panfrost_shader_state state;
uint64_t outputs_written;
- panfrost_shader_compile(ctx, &meta,
- PIPE_SHADER_IR_NIR,
- so->base.ir.nir,
- tgsi_processor_to_shader_stage(stage), &state,
- &outputs_written);
+ panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
+ so->base.ir.nir,
+ tgsi_processor_to_shader_stage(stage),
+ &state, &outputs_written);
}
return so;
PIPE_SPRITE_COORD_UPPER_LEFT;
}
}
-
- variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta));
-
}
/* Select this variant */
if (!shader_state->compiled) {
uint64_t outputs_written = 0;
- panfrost_shader_compile(ctx, shader_state->tripipe,
- variants->base.type,
- variants->base.type == PIPE_SHADER_IR_NIR ?
- variants->base.ir.nir :
- variants->base.tokens,
- tgsi_processor_to_shader_stage(type), shader_state,
+ panfrost_shader_compile(ctx, variants->base.type,
+ variants->base.type == PIPE_SHADER_IR_NIR ?
+ variants->base.ir.nir :
+ variants->base.tokens,
+ tgsi_processor_to_shader_stage(type),
+ shader_state,
&outputs_written);
shader_state->compiled = true;
ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
}
- /* Stencil state */
- SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled);
-
- panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
- ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
-
- /* If back-stencil is not enabled, use the front values */
- bool back_enab = ctx->depth_stencil->stencil[1].enabled;
- unsigned back_index = back_enab ? 1 : 0;
-
- panfrost_make_stencil_state(&depth_stencil->stencil[back_index], &ctx->fragment_shader_core.stencil_back);
- ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask;
-
- /* Depth state (TODO: Refactor) */
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_WRITEMASK,
- depth_stencil->depth.writemask);
-
- int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
-
- ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
- ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
-
/* Bounds test not implemented */
assert(!depth_stencil->depth.bounds_test);
}
panfrost_batch_init(ctx);
panfrost_emit_vertex_payload(ctx);
panfrost_invalidate_frame(ctx);
- panfrost_default_shader_backend(ctx);
return gallium;
}
/* Each draw has corresponding vertex and tiler payloads */
struct midgard_payload_vertex_tiler payloads[PIPE_SHADER_TYPES];
- /* The fragment shader binary itself is pointed here (for the tripipe) but
- * also everything else in the shader core, including blending, the
- * stencil/depth tests, etc. Refer to the presentations. */
-
- struct mali_shader_meta fragment_shader_core;
-
unsigned vertex_count;
unsigned instance_count;
enum pipe_prim_type active_prim;
struct panfrost_shader_state {
/* Compiled, mapped descriptor, ready for the hardware */
bool compiled;
- struct mali_shader_meta *tripipe;
/* Non-descript information */
int uniform_count;
bool
panfrost_writes_point_size(struct panfrost_context *ctx);
-void
-panfrost_patch_shader_state(struct panfrost_context *ctx,
- enum pipe_shader_type stage);
-
struct panfrost_transfer
panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws);
void
-panfrost_shader_compile(
- struct panfrost_context *ctx,
- struct mali_shader_meta *meta,
- enum pipe_shader_ir ir_type,
- const void *ir,
- gl_shader_stage stage,
- struct panfrost_shader_state *state,
- uint64_t *outputs_written);
+panfrost_shader_compile(struct panfrost_context *ctx,
+ enum pipe_shader_ir ir_type,
+ const void *ir,
+ gl_shader_stage stage,
+ struct panfrost_shader_state *state,
+ uint64_t *outputs_written);
unsigned
panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage);