#include "pan_bo.h"
#include "pan_context.h"
#include "pan_format.h"
+#include "panfrost-quirks.h"
#include "util/macros.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_upload_mgr.h"
#include "util/u_memory.h"
#include "util/u_vbuf.h"
#include "util/half_float.h"
#include "util/u_helpers.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_prim.h"
#include "util/u_prim_restart.h"
#include "indices/u_primconvert.h"
static struct midgard_tiler_descriptor
panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
{
- struct midgard_tiler_descriptor t = {};
+ struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
+ bool hierarchy = !(screen->quirks & MIDGARD_NO_HIER_TILING);
+ struct midgard_tiler_descriptor t = {0};
unsigned height = batch->key.height;
unsigned width = batch->key.width;
t.hierarchy_mask =
- panfrost_choose_hierarchy_mask(width, height, vertex_count);
+ panfrost_choose_hierarchy_mask(width, height, vertex_count, hierarchy);
/* Compute the polygon header size and use that to offset the body */
unsigned header_size = panfrost_tiler_header_size(
- width, height, t.hierarchy_mask);
+ width, height, t.hierarchy_mask, hierarchy);
t.polygon_list_size = panfrost_tiler_full_size(
- width, height, t.hierarchy_mask);
+ width, height, t.hierarchy_mask, hierarchy);
/* Sanity check */
- if (t.hierarchy_mask) {
+ if (vertex_count) {
struct panfrost_bo *tiler_heap;
tiler_heap = panfrost_batch_get_tiler_heap(batch);
struct panfrost_bo *tiler_dummy;
tiler_dummy = panfrost_batch_get_tiler_dummy(batch);
+ header_size = MALI_TILER_MINIMUM_HEADER_SIZE;
/* The tiler is disabled, so don't allow the tiler heap */
t.heap_start = tiler_dummy->gpu;
t.polygon_list = tiler_dummy->gpu;
/* Disable the tiler */
- t.hierarchy_mask |= MALI_TILER_DISABLED;
+ if (hierarchy)
+ t.hierarchy_mask |= MALI_TILER_DISABLED;
+ else {
+ t.hierarchy_mask = MALI_TILER_USER;
+ t.polygon_list_size = MALI_TILER_MINIMUM_HEADER_SIZE + 4;
+
+ /* We don't have a WRITE_VALUE job, so write the polygon list manually */
+ uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->cpu + header_size);
+ polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */
+ }
}
t.polygon_list_body =
.width = MALI_POSITIVE(width),
.height = MALI_POSITIVE(height),
.unknown2 = 0x1f,
- .format = 0x30000000,
+ .format = {
+ .unk3 = 0x3,
+ },
.clear_flags = 0x1000,
- .unknown_address_0 = panfrost_batch_get_scratchpad(batch)->gpu,
+ .scratchpad = panfrost_batch_get_scratchpad(batch)->gpu,
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
};
unsigned height = batch->key.height;
struct bifrost_framebuffer framebuffer = {
- .unk0 = 0x1e5, /* 1e4 if no spill */
+ .stack_shift = 0x5,
+ .unk0 = 0x1e,
.width1 = MALI_POSITIVE(width),
.height1 = MALI_POSITIVE(height),
.width2 = MALI_POSITIVE(width),
* the existing batch targeting this FBO has draws. We could probably
* avoid that by replacing plain clears by quad-draws with a specific
* color/depth/stencil value, thus avoiding the generation of extra
- * fragment/set_value jobs.
+ * fragment jobs.
*/
struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx);
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
if (!batch->framebuffer)
- batch->framebuffer = screen->require_sfbd ?
+ batch->framebuffer = (screen->quirks & MIDGARD_SFBD) ?
panfrost_attach_vt_sfbd(batch) :
panfrost_attach_vt_mfbd(batch);
static void
panfrost_default_shader_backend(struct panfrost_context *ctx)
{
+ struct panfrost_screen *screen = pan_screen(ctx->base.screen);
struct mali_shader_meta shader = {
.alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
.unknown2_4 = MALI_NO_MSAA | 0x4e0,
};
- /* unknown2_4 has 0x10 bit set on T6XX. We don't know why this is
+ /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this is
* required (independent of 32-bit/64-bit descriptors), or why it's not
* used on later GPU revisions. Otherwise, all shader jobs fault on
* these earlier chips (perhaps this is a chicken bit of some kind).
* More investigation is needed. */
- if (ctx->is_t6xx) {
+ if (screen->quirks & MIDGARD_SFBD)
shader.unknown2_4 |= 0x10;
- }
struct pipe_stencil_state default_stencil = {
.enabled = 0,
uniform->u[2] = sb.buffer_size;
}
+static void
+panfrost_upload_sampler_sysval(
+ struct panfrost_context *ctx,
+ enum pipe_shader_type st,
+ unsigned sampler_index,
+ struct sysval_uniform *uniform)
+{
+ struct pipe_sampler_state *sampl =
+ &ctx->samplers[st][sampler_index]->base;
+
+ uniform->f[0] = sampl->min_lod;
+ uniform->f[1] = sampl->max_lod;
+ uniform->f[2] = sampl->lod_bias;
+
+ /* Even without any errata, Midgard represents "no mipmapping" as
+ * fixing the LOD with the clamps; keep behaviour consistent. c.f.
+ * panfrost_create_sampler_state which also explains our choice of
+ * epsilon value (again to keep behaviour consistent) */
+
+ if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
+ uniform->f[1] = uniform->f[0] + (1.0/256.0);
+}
+
static void panfrost_upload_num_work_groups_sysval(struct panfrost_context *ctx,
struct sysval_uniform *uniform)
{
case PAN_SYSVAL_NUM_WORK_GROUPS:
panfrost_upload_num_work_groups_sysval(ctx, &uniforms[i]);
break;
-
+ case PAN_SYSVAL_SAMPLER:
+ panfrost_upload_sampler_sysval(ctx, st, PAN_SYSVAL_ID(sysval),
+ &uniforms[i]);
+ break;
default:
assert(0);
}
return 32 - __builtin_clz(mask);
}
-/* Fixes up a shader state with current state, returning a GPU address to the
- * patched shader */
+/* Fixes up a shader state with current state */
-static mali_ptr
-panfrost_patch_shader_state(
- struct panfrost_context *ctx,
- struct panfrost_shader_state *ss,
- enum pipe_shader_type stage,
- bool should_upload)
+static void
+panfrost_patch_shader_state(struct panfrost_context *ctx,
+ enum pipe_shader_type stage)
{
+ struct panfrost_shader_variants *all = ctx->shader[stage];
+
+ if (!all) {
+ ctx->payloads[stage].postfix.shader = 0;
+ return;
+ }
+
+ struct panfrost_shader_state *ss = &all->variants[all->active_variant];
+
ss->tripipe->texture_count = ctx->sampler_view_count[stage];
ss->tripipe->sampler_count = ctx->sampler_count[stage];
PAN_BO_ACCESS_READ |
panfrost_bo_access_for_stage(stage));
- /* We can't reuse over frames; that's not safe. The descriptor must be
- * transient uploaded */
-
- if (should_upload) {
- return panfrost_upload_transient(batch, ss->tripipe,
- sizeof(struct mali_shader_meta));
- }
-
- /* If we don't need an upload, don't bother */
- return 0;
-
-}
-
-static void
-panfrost_patch_shader_state_compute(
- struct panfrost_context *ctx,
- enum pipe_shader_type stage,
- bool should_upload)
-{
- struct panfrost_shader_variants *all = ctx->shader[stage];
-
- if (!all) {
- ctx->payloads[stage].postfix._shader_upper = 0;
- return;
- }
-
- struct panfrost_shader_state *s = &all->variants[all->active_variant];
-
- ctx->payloads[stage].postfix._shader_upper =
- panfrost_patch_shader_state(ctx, s, stage, should_upload) >> 4;
+ ctx->payloads[stage].postfix.shader = panfrost_upload_transient(batch,
+ ss->tripipe,
+ sizeof(struct mali_shader_meta));
}
/* Go through dirty flags and actualise them in the cmdstream. */
ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.occlusion_counter = ctx->occlusion_query->bo->gpu;
}
- panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_VERTEX, true);
- panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_COMPUTE, true);
+ panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
+ panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
/* Check if we need to link the gl_PointSize varying */
assert(ctx->shader[PIPE_SHADER_FRAGMENT]);
struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
- panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
+ panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
ctx->fragment_shader_core.midgard1.work_count = 16;
}
- /* Set late due to depending on render state */
- unsigned flags = ctx->fragment_shader_core.midgard1.flags;
-
/* Depending on whether it's legal to in the given shader, we
* try to enable early-z testing (or forward-pixel kill?) */
- if (!variant->can_discard)
- flags |= MALI_EARLY_Z;
+ SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_EARLY_Z, !variant->can_discard);
/* Any time texturing is used, derivatives are implicitly
* calculated, so we need to enable helper invocations */
- if (variant->helper_invocations)
- flags |= MALI_HELPER_INVOCATIONS;
-
- ctx->fragment_shader_core.midgard1.flags = flags;
+ SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_HELPER_INVOCATIONS, variant->helper_invocations);
/* Assign the stencil refs late */
* thing?" by Peter Harris
*/
- if (variant->can_discard) {
- ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
- ctx->fragment_shader_core.midgard1.flags |= 0x400;
- }
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard);
+ SET_BIT(ctx->fragment_shader_core.midgard1.flags, 0x400, variant->can_discard);
/* Even on MFBD, the shader descriptor gets blend shaders. It's
* *also* copied to the blend_meta appended (by convention),
ctx->fragment_shader_core.blend.shader = 0;
}
- if (screen->require_sfbd) {
+ if (screen->quirks & MIDGARD_SFBD) {
/* When only a single render target platform is used, the blend
* information is inside the shader meta itself. We
* additionally need to signal CAN_DISCARD for nontrivial blend
* modes (so we're able to read back the destination buffer) */
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader);
+
if (!blend[0].is_shader) {
ctx->fragment_shader_core.blend.equation =
*blend[0].equation.equation;
blend[0].equation.constant;
}
- if (!blend[0].no_blending) {
- ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
- }
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending);
}
size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
- ctx->payloads[PIPE_SHADER_FRAGMENT].postfix._shader_upper = (transfer.gpu) >> 4;
+ ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu;
- if (!screen->require_sfbd) {
+ if (!(screen->quirks & MIDGARD_SFBD)) {
/* Additional blend descriptor tacked on for jobs using MFBD */
struct midgard_blend_rt rts[4];
for (unsigned i = 0; i < rt_count; ++i) {
- unsigned blend_count = 0x200;
-
- if (blend[i].is_shader) {
- /* For a blend shader, the bottom nibble corresponds to
- * the number of work registers used, which signals the
- * -existence- of a blend shader */
-
- assert(blend[i].shader.work_count >= 2);
- blend_count |= MIN2(blend[i].shader.work_count, 3);
- } else {
- /* Otherwise, the bottom bit simply specifies if
- * blending (anything other than REPLACE) is enabled */
-
- if (!blend[i].no_blending)
- blend_count |= 0x1;
- }
-
+ rts[i].flags = 0x200;
bool is_srgb =
(ctx->pipe_framebuffer.nr_cbufs > i) &&
(ctx->pipe_framebuffer.cbufs[i]) &&
util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
- rts[i].flags = blend_count;
-
- if (is_srgb)
- rts[i].flags |= MALI_BLEND_SRGB;
-
- if (!ctx->blend->base.dither)
- rts[i].flags |= MALI_BLEND_NO_DITHER;
+ SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
+ SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
+ SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
+ SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
/* TODO: sRGB in blend shaders is currently
* unimplemented. Contact me (Alyssa) if you're
if (info->primitive_restart)
draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
- /* For higher amounts of vertices (greater than what fits in a 16-bit
- * short), the other value is needed, otherwise there will be bizarre
- * rendering artefacts. It's not clear what these values mean yet. This
- * change is also needed for instancing and sometimes points (perhaps
- * related to dynamically setting gl_PointSize) */
+ /* These doesn't make much sense */
- bool is_points = mode == PIPE_PRIM_POINTS;
- bool many_verts = ctx->vertex_count > 0xFFFF;
- bool instanced = ctx->instance_count > 1;
+ draw_flags |= 0x3000;
- draw_flags |= (is_points || many_verts || instanced) ? 0x3000 : 0x18000;
-
- /* This doesn't make much sense */
if (mode == PIPE_PRIM_LINE_STRIP) {
draw_flags |= 0x800;
}
ctx->rasterizer = hwcso;
ctx->dirty |= PAN_DIRTY_RASTERIZER;
- ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units;
+ ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
/* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
},
.min_lod = FIXED_16(cso->min_lod),
.max_lod = FIXED_16(cso->max_lod),
+ .lod_bias = FIXED_16(cso->lod_bias),
.seamless_cube_map = cso->seamless_cube_map,
};
uint64_t outputs_written)
{
uint64_t so_outputs = 0;
- uint8_t reverse_map[64] = {};
+ uint8_t reverse_map[64] = {0};
unsigned slot = 0;
while (outputs_written)
struct pipe_sampler_view **views)
{
struct panfrost_context *ctx = pan_context(pctx);
+ unsigned new_nr = 0;
+ unsigned i;
assert(start_slot == 0);
- unsigned new_nr = 0;
- for (unsigned i = 0; i < num_views; ++i) {
+ for (i = 0; i < num_views; ++i) {
if (views[i])
new_nr = i + 1;
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
+ views[i]);
}
+ for (; i < ctx->sampler_view_count[shader]; i++) {
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
+ NULL);
+ }
ctx->sampler_view_count[shader] = new_nr;
- memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *));
ctx->dirty |= PAN_DIRTY_TEXTURES;
}
ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask;
/* Depth state (TODO: Refactor) */
- SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled);
+ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_WRITEMASK,
+ depth_stencil->depth.writemask);
int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
if (panfrost->blitter_wallpaper)
util_blitter_destroy(panfrost->blitter_wallpaper);
+ util_unreference_framebuffer_state(&panfrost->pipe_framebuffer);
+ u_upload_destroy(pipe->stream_uploader);
+
ralloc_free(pipe);
}
struct panfrost_screen *pscreen = pan_screen(screen);
struct pipe_context *gallium = (struct pipe_context *) ctx;
- ctx->is_t6xx = pscreen->gpu_id < 0x0700; /* Literally, "earlier than T700" */
-
gallium->screen = screen;
gallium->destroy = panfrost_destroy;