/*
* © Copyright 2018 Alyssa Rosenzweig
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
#include "util/half_float.h"
#include "util/u_helpers.h"
#include "util/u_format.h"
+#include "util/u_prim.h"
#include "util/u_prim_restart.h"
#include "indices/u_primconvert.h"
#include "tgsi/tgsi_parse.h"
#include "pan_blending.h"
#include "pan_blend_shaders.h"
#include "pan_util.h"
-#include "pan_tiler.h"
-
-/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
-//#define DRY_RUN
/* Framebuffer descriptor */
unsigned vertex_count)
{
struct midgard_tiler_descriptor t = {};
+ struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
t.hierarchy_mask =
panfrost_choose_hierarchy_mask(width, height, vertex_count);
unsigned header_size = panfrost_tiler_header_size(
width, height, t.hierarchy_mask);
- unsigned body_size = panfrost_tiler_body_size(
+ t.polygon_list_size = panfrost_tiler_full_size(
width, height, t.hierarchy_mask);
/* Sanity check */
if (t.hierarchy_mask) {
- assert(ctx->tiler_polygon_list.bo->size >= (header_size + body_size));
+ t.polygon_list = panfrost_job_get_polygon_list(batch,
+ header_size + t.polygon_list_size);
- /* Specify allocated tiler structures */
- t.polygon_list = ctx->tiler_polygon_list.bo->gpu;
/* Allow the entire tiler heap */
t.heap_start = ctx->tiler_heap.bo->gpu;
/* Use a dummy polygon list */
t.polygon_list = ctx->tiler_dummy.bo->gpu;
- /* Also, set a "tiler disabled?" flag? */
- t.hierarchy_mask |= 0x1000;
+ /* Disable the tiler */
+ t.hierarchy_mask |= MALI_TILER_DISABLED;
}
t.polygon_list_body =
t.polygon_list + header_size;
- t.polygon_list_size =
- header_size + body_size;
-
return t;
}
static mali_ptr
panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
{
- return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)) | MALI_MFBD;
+ struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(ctx, ~0);
+
+ return panfrost_upload_transient(ctx, &mfbd, sizeof(mfbd)) | MALI_MFBD;
}
static mali_ptr
panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
{
- return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD;
+ struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(ctx, ~0);
+
+ return panfrost_upload_transient(ctx, &sfbd, sizeof(sfbd)) | MALI_SFBD;
}
static void
-panfrost_attach_vt_framebuffer(struct panfrost_context *ctx, bool skippable)
+panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
{
/* Skip the attach if we can */
- if (skippable && ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer) {
+ if (ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer) {
assert(ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
return;
}
static void
panfrost_invalidate_frame(struct panfrost_context *ctx)
{
- struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-
- if (screen->require_sfbd)
- ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx, ~0);
- else
- ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx, ~0);
-
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
ctx->payloads[i].postfix.framebuffer = 0;
/* XXX */
ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
+
+ /* TODO: When does this need to be handled? */
+ ctx->active_queries = true;
}
/* In practice, every field of these payloads should be configurable
case PIPE_TEX_WRAP_REPEAT:
return MALI_WRAP_REPEAT;
+ /* TODO: lower GL_CLAMP? */
+ case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return MALI_WRAP_CLAMP_TO_EDGE;
return transfer;
}
-static mali_ptr
-panfrost_emit_varyings(
- struct panfrost_context *ctx,
- union mali_attr *slot,
- unsigned stride,
- unsigned count)
-{
- /* Fill out the descriptor */
- slot->stride = stride;
- slot->size = stride * count;
- slot->shift = slot->extra_flags = 0;
-
- struct panfrost_transfer transfer =
- panfrost_allocate_transient(ctx, slot->size);
-
- slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
-
- return transfer.gpu;
-}
-
-static void
-panfrost_emit_point_coord(union mali_attr *slot)
-{
- slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
- slot->stride = slot->size = slot->shift = slot->extra_flags = 0;
-}
-
-static void
-panfrost_emit_front_face(union mali_attr *slot)
-{
- slot->elements = MALI_VARYING_FRONT_FACING | MALI_ATTR_INTERNAL;
-}
-
-static void
-panfrost_emit_varying_descriptor(
- struct panfrost_context *ctx,
- unsigned vertex_count)
-{
- /* Load the shaders */
-
- struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
- struct panfrost_shader_state *fs = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
- unsigned int num_gen_varyings = 0;
-
- /* Allocate the varying descriptor */
-
- size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
- size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
-
- struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
- vs_size + fs_size);
-
- /*
- * Assign ->src_offset now that we know about all the general purpose
- * varyings that will be used by the fragment and vertex shaders.
- */
- for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
- /*
- * General purpose varyings have ->index set to 0, skip other
- * entries.
- */
- if (vs->varyings[i].index)
- continue;
-
- vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
- }
-
- for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
- unsigned j;
-
- /* If we have a point sprite replacement, handle that here. We
- * have to translate location first. TODO: Flip y in shader.
- * We're already keying ... just time crunch .. */
-
- unsigned loc = fs->varyings_loc[i];
- unsigned pnt_loc =
- (loc >= VARYING_SLOT_VAR0) ? (loc - VARYING_SLOT_VAR0) :
- (loc == VARYING_SLOT_PNTC) ? 8 :
- ~0;
-
- if (~pnt_loc && fs->point_sprite_mask & (1 << pnt_loc)) {
- /* gl_PointCoord index by convention */
- fs->varyings[i].index = 3;
- fs->reads_point_coord = true;
-
- /* Swizzle out the z/w to 0/1 */
- fs->varyings[i].format = MALI_RG16F;
- fs->varyings[i].swizzle =
- panfrost_get_default_swizzle(2);
-
- continue;
- }
-
- if (fs->varyings[i].index)
- continue;
-
- /*
- * Re-use the VS general purpose varying pos if it exists,
- * create a new one otherwise.
- */
- for (j = 0; j < vs->tripipe->varying_count; j++) {
- if (fs->varyings_loc[i] == vs->varyings_loc[j])
- break;
- }
-
- if (j < vs->tripipe->varying_count)
- fs->varyings[i].src_offset = vs->varyings[j].src_offset;
- else
- fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
- }
-
- memcpy(trans.cpu, vs->varyings, vs_size);
- memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
-
- ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
- ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size;
-
- /* Buffer indices must be in this order per our convention */
- union mali_attr varyings[PIPE_MAX_ATTRIBS];
- unsigned idx = 0;
-
- panfrost_emit_varyings(ctx, &varyings[idx++], num_gen_varyings * 16,
- vertex_count);
-
- /* fp32 vec4 gl_Position */
- ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
- panfrost_emit_varyings(ctx, &varyings[idx++],
- sizeof(float) * 4, vertex_count);
-
-
- if (vs->writes_point_size || fs->reads_point_coord) {
- /* fp16 vec1 gl_PointSize */
- ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
- panfrost_emit_varyings(ctx, &varyings[idx++],
- 2, vertex_count);
- } else if (fs->reads_face) {
- /* Dummy to advance index */
- ++idx;
- }
-
- if (fs->reads_point_coord) {
- /* Special descriptor */
- panfrost_emit_point_coord(&varyings[idx++]);
- } else if (fs->reads_face) {
- ++idx;
- }
-
- if (fs->reads_face) {
- panfrost_emit_front_face(&varyings[idx++]);
- }
-
- mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
- ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
- ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;
-}
-
mali_ptr
panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
{
* QED.
*/
- unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].draw_start;
+ unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
for (unsigned i = 0; i < so->num_elements; ++i) {
unsigned vbi = so->pipe[i].vertex_buffer_index;
float f[4];
int32_t i[4];
uint32_t u[4];
+ uint64_t du[2];
};
};
uniform->i[dim] = tex->texture->array_size;
}
+static void panfrost_upload_ssbo_sysval(
+ struct panfrost_context *ctx,
+ enum pipe_shader_type st,
+ unsigned ssbo_id,
+ struct sysval_uniform *uniform)
+{
+ assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
+ struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
+
+ /* Compute address */
+ struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
+ struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
+
+ panfrost_job_add_bo(batch, bo);
+
+ /* Upload address and size as sysval */
+ uniform->du[0] = bo->gpu + sb.buffer_offset;
+ uniform->u[2] = sb.buffer_size;
+}
+
+static void panfrost_upload_num_work_groups_sysval(struct panfrost_context *ctx,
+ struct sysval_uniform *uniform)
+{
+ uniform->u[0] = ctx->compute_grid->grid[0];
+ uniform->u[1] = ctx->compute_grid->grid[1];
+ uniform->u[2] = ctx->compute_grid->grid[2];
+}
+
static void panfrost_upload_sysvals(struct panfrost_context *ctx, void *buf,
struct panfrost_shader_state *ss,
enum pipe_shader_type st)
panfrost_upload_txs_sysval(ctx, st, PAN_SYSVAL_ID(sysval),
&uniforms[i]);
break;
+ case PAN_SYSVAL_SSBO:
+ panfrost_upload_ssbo_sysval(ctx, st, PAN_SYSVAL_ID(sysval),
+ &uniforms[i]);
+ break;
+ case PAN_SYSVAL_NUM_WORK_GROUPS:
+ panfrost_upload_num_work_groups_sysval(ctx, &uniforms[i]);
+ break;
+
default:
assert(0);
}
struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
- panfrost_attach_vt_framebuffer(ctx, true);
+ panfrost_attach_vt_framebuffer(ctx);
if (with_vertex_data) {
panfrost_emit_vertex_data(job);
panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
+ panfrost_job_add_bo(job, variant->bo);
+
#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
COPY(shader);
#undef COPY
/* Get blending setup */
- struct panfrost_blend_final blend =
- panfrost_get_blend_for_context(ctx, 0);
+ unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+
+ struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
- /* If there is a blend shader, work registers are shared */
+ for (unsigned c = 0; c < rt_count; ++c)
+ blend[c] = panfrost_get_blend_for_context(ctx, c);
- if (blend.is_shader)
- ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16;
+ /* If there is a blend shader, work registers are shared. XXX: opt */
+
+ for (unsigned c = 0; c < rt_count; ++c) {
+ if (blend[c].is_shader)
+ ctx->fragment_shader_core.midgard1.work_count = 16;
+ }
/* Set late due to depending on render state */
unsigned flags = ctx->fragment_shader_core.midgard1.flags;
ctx->fragment_shader_core.midgard1.flags |= 0x400;
}
- /* Check if we're using the default blend descriptor (fast path) */
-
- bool no_blending =
- !blend.is_shader &&
- (blend.equation.equation->rgb_mode == 0x122) &&
- (blend.equation.equation->alpha_mode == 0x122) &&
- (blend.equation.equation->color_mask == 0xf);
-
/* Even on MFBD, the shader descriptor gets blend shaders. It's
* *also* copied to the blend_meta appended (by convention),
* but this is the field actually read by the hardware. (Or
* maybe both are read...?) */
- if (blend.is_shader) {
+ if (blend[0].is_shader) {
ctx->fragment_shader_core.blend.shader =
- blend.shader.gpu;
+ blend[0].shader.bo->gpu | blend[0].shader.first_tag;
} else {
ctx->fragment_shader_core.blend.shader = 0;
}
* additionally need to signal CAN_DISCARD for nontrivial blend
* modes (so we're able to read back the destination buffer) */
- if (!blend.is_shader) {
+ if (!blend[0].is_shader) {
ctx->fragment_shader_core.blend.equation =
- *blend.equation.equation;
+ *blend[0].equation.equation;
ctx->fragment_shader_core.blend.constant =
- blend.equation.constant;
+ blend[0].equation.constant;
}
- if (!no_blending) {
+ if (!blend[0].no_blending) {
ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
}
}
- size_t size = sizeof(struct mali_shader_meta) + sizeof(struct midgard_blend_rt);
+ size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
if (!screen->require_sfbd) {
/* Additional blend descriptor tacked on for jobs using MFBD */
- unsigned blend_count = 0x200;
+ struct midgard_blend_rt rts[4];
- if (blend.is_shader) {
- /* For a blend shader, the bottom nibble corresponds to
- * the number of work registers used, which signals the
- * -existence- of a blend shader */
+ for (unsigned i = 0; i < rt_count; ++i) {
+ unsigned blend_count = 0x200;
- assert(blend.shader.work_count >= 2);
- blend_count |= MIN2(blend.shader.work_count, 3);
- } else {
- /* Otherwise, the bottom bit simply specifies if
- * blending (anything other than REPLACE) is enabled */
+ if (blend[i].is_shader) {
+ /* For a blend shader, the bottom nibble corresponds to
+ * the number of work registers used, which signals the
+ * -existence- of a blend shader */
+ assert(blend[i].shader.work_count >= 2);
+ blend_count |= MIN2(blend[i].shader.work_count, 3);
+ } else {
+ /* Otherwise, the bottom bit simply specifies if
+ * blending (anything other than REPLACE) is enabled */
- if (!no_blending)
- blend_count |= 0x1;
- }
+ if (!blend[i].no_blending)
+ blend_count |= 0x1;
+ }
- struct midgard_blend_rt rts[4];
- for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
bool is_srgb =
(ctx->pipe_framebuffer.nr_cbufs > i) &&
(ctx->pipe_framebuffer.cbufs[i]) &&
* native Midgard ops for helping here, but
* they're not well-understood yet. */
- assert(!(is_srgb && blend.is_shader));
+ assert(!(is_srgb && blend[i].is_shader));
- if (blend.is_shader) {
- rts[i].blend.shader = blend.shader.gpu;
+ if (blend[i].is_shader) {
+ rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag;
} else {
- rts[i].blend.equation = *blend.equation.equation;
- rts[i].blend.constant = blend.equation.constant;
+ rts[i].blend.equation = *blend[i].equation.equation;
+ rts[i].blend.constant = blend[i].equation.constant;
}
}
- memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * 1);
+ memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count);
}
}
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = pan_screen(gallium->screen);
-#ifndef DRY_RUN
-
panfrost_job_submit(ctx, job);
/* If visual, we can stall a frame */
/* If readback, flush now (hurts the pipelined performance) */
if (flush_immediate)
panfrost_drm_force_flush_fragment(ctx, fence);
-#endif
}
static void
struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
ctx->wallpaper_batch = batch;
- panfrost_blit_wallpaper(ctx);
+
+ /* Clamp the rendering area to the damage extent. The
+ * KHR_partial_update() spec states that trying to render outside of
+ * the damage region is "undefined behavior", so we should be safe.
+ */
+ unsigned damage_width = (rsrc->damage.extent.maxx - rsrc->damage.extent.minx);
+ unsigned damage_height = (rsrc->damage.extent.maxy - rsrc->damage.extent.miny);
+
+ if (damage_width && damage_height) {
+ panfrost_job_intersection_scissor(batch, rsrc->damage.extent.minx,
+ rsrc->damage.extent.miny,
+ rsrc->damage.extent.maxx,
+ rsrc->damage.extent.maxy);
+ }
+
+ /* FIXME: Looks like aligning on a tile is not enough, but
+ * aligning on twice the tile size seems to works. We don't
+ * know exactly what happens here but this deserves extra
+ * investigation to figure it out.
+ */
+ batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
+ batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
+ batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
+ rsrc->base.width0);
+ batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
+ rsrc->base.height0);
+
+ struct pipe_scissor_state damage;
+ struct pipe_box rects[4];
+
+ /* Clamp the damage box to the rendering area. */
+ damage.minx = MAX2(batch->minx, rsrc->damage.biggest_rect.x);
+ damage.miny = MAX2(batch->miny, rsrc->damage.biggest_rect.y);
+ damage.maxx = MIN2(batch->maxx,
+ rsrc->damage.biggest_rect.x +
+ rsrc->damage.biggest_rect.width);
+ damage.maxy = MIN2(batch->maxy,
+ rsrc->damage.biggest_rect.y +
+ rsrc->damage.biggest_rect.height);
+
+ /* One damage rectangle means we can end up with at most 4 reload
+ * regions:
+ * 1: left region, only exists if damage.x > 0
+ * 2: right region, only exists if damage.x + damage.width < fb->width
+ * 3: top region, only exists if damage.y > 0. The intersection with
+ * the left and right regions are dropped
+ * 4: bottom region, only exists if damage.y + damage.height < fb->height.
+ * The intersection with the left and right regions are dropped
+ *
+ * ____________________________
+ * | | 3 | |
+ * | |___________| |
+ * | | damage | |
+ * | 1 | rect | 2 |
+ * | |___________| |
+ * | | 4 | |
+ * |_______|___________|______|
+ */
+ u_box_2d(batch->minx, batch->miny, damage.minx - batch->minx,
+ batch->maxy - batch->miny, &rects[0]);
+ u_box_2d(damage.maxx, batch->miny, batch->maxx - damage.maxx,
+ batch->maxy - batch->miny, &rects[1]);
+ u_box_2d(damage.minx, batch->miny, damage.maxx - damage.minx,
+ damage.miny - batch->miny, &rects[2]);
+ u_box_2d(damage.minx, damage.maxy, damage.maxx - damage.minx,
+ batch->maxy - damage.maxy, &rects[3]);
+
+ for (unsigned i = 0; i < 4; i++) {
+ /* Width and height are always >= 0 even if width is declared as a
+ * signed integer: u_box_2d() helper takes unsigned args and
+ * panfrost_set_damage_region() is taking care of clamping
+ * negative values.
+ */
+ if (!rects[i].width || !rects[i].height)
+ continue;
+
+ /* Blit the wallpaper in */
+ panfrost_blit_wallpaper(ctx, &rects[i]);
+ }
ctx->wallpaper_batch = NULL;
}
return (ss->minx == ss->maxx) || (ss->miny == ss->maxy);
}
+/* Count generated primitives (when there is no geom/tess shaders) for
+ * transform feedback */
+
+static void
+panfrost_statistics_record(
+ struct panfrost_context *ctx,
+ const struct pipe_draw_info *info)
+{
+ if (!ctx->active_queries)
+ return;
+
+ uint32_t prims = u_prims_for_vertices(info->mode, info->count);
+ ctx->prims_generated += prims;
+
+ if (ctx->streamout.num_targets <= 0)
+ return;
+
+ ctx->tf_prims_generated += prims;
+}
+
static void
panfrost_draw_vbo(
struct pipe_context *pipe,
if (panfrost_scissor_culls_everything(ctx))
return;
- ctx->payloads[PIPE_SHADER_VERTEX].draw_start = info->start;
- ctx->payloads[PIPE_SHADER_FRAGMENT].draw_start = info->start;
+ ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
+ ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
int mode = info->mode;
ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
- ctx->vertex_count = info->count;
+ /* Take into account a negative bias */
+ ctx->vertex_count = info->count + abs(info->index_bias);
ctx->instance_count = info->instance_count;
+ ctx->active_prim = info->mode;
/* For non-indexed draws, they're the same */
unsigned vertex_count = ctx->vertex_count;
draw_flags |= 0x800;
}
+ panfrost_statistics_record(ctx, info);
+
if (info->index_size) {
/* Calculate the min/max index used so we can figure out how
* many times to invoke the vertex shader */
/* Use the corresponding values */
vertex_count = max_index - min_index + 1;
- ctx->payloads[PIPE_SHADER_VERTEX].draw_start = min_index;
- ctx->payloads[PIPE_SHADER_FRAGMENT].draw_start = min_index;
+ ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
+ ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.negative_start = -min_index;
+ ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
//assert(!info->restart_index); /* TODO: Research */
- assert(!info->index_bias);
draw_flags |= panfrost_translate_index_size(info->index_size);
ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
/* Index count == vertex count, if no indexing is applied, as
* if it is internally indexed in the expected order */
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.negative_start = 0;
+ ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
/* Reverse index state */
/* Fire off the draw itself */
panfrost_queue_draw(ctx);
+
+ /* Increment transform feedback offsets */
+
+ for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
+ unsigned output_count = u_stream_outputs_for_vertices(
+ ctx->active_prim, ctx->vertex_count);
+
+ ctx->streamout.offsets[i] += output_count;
+ }
}
/* CSO state */
DBG("Deleting TGSI shader leaks duplicated tokens\n");
}
+ for (unsigned i = 0; i < cso->variant_count; ++i) {
+ struct panfrost_shader_state *shader_state = &cso->variants[i];
+ panfrost_bo_unreference(pctx->screen, shader_state->bo);
+ shader_state->bo = NULL;
+ }
+
free(so);
}
return true;
}
+/**
+ * Fix an uncompiled shader's stream output info, and produce a bitmask
+ * of which VARYING_SLOT_* are captured for stream output.
+ *
+ * Core Gallium stores output->register_index as a "slot" number, where
+ * slots are assigned consecutively to all outputs in info->outputs_written.
+ * This naive packing of outputs doesn't work for us - we too have slots,
+ * but the layout is defined by the VUE map, which we won't have until we
+ * compile a specific shader variant. So, we remap these and simply store
+ * VARYING_SLOT_* in our copy's output->register_index fields.
+ *
+ * We then produce a bitmask of outputs which are used for SO.
+ *
+ * Implementation from iris.
+ */
+
+static uint64_t
+update_so_info(struct pipe_stream_output_info *so_info,
+ uint64_t outputs_written)
+{
+ uint64_t so_outputs = 0;
+ uint8_t reverse_map[64] = {};
+ unsigned slot = 0;
+
+ while (outputs_written)
+ reverse_map[slot++] = u_bit_scan64(&outputs_written);
+
+ for (unsigned i = 0; i < so_info->num_outputs; i++) {
+ struct pipe_stream_output *output = &so_info->output[i];
+
+ /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
+ output->register_index = reverse_map[output->register_index];
+
+ so_outputs |= 1ull << output->register_index;
+ }
+
+ return so_outputs;
+}
+
static void
panfrost_bind_shader_state(
struct pipe_context *pctx,
}
}
- variants->variants[variant].tripipe = malloc(sizeof(struct mali_shader_meta));
+ variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta));
}
/* We finally have a variant, so compile it */
if (!shader_state->compiled) {
+ uint64_t outputs_written = 0;
+
panfrost_shader_compile(ctx, shader_state->tripipe,
variants->base.type,
variants->base.type == PIPE_SHADER_IR_NIR ?
variants->base.ir.nir :
variants->base.tokens,
- tgsi_processor_to_shader_stage(type), shader_state);
+ tgsi_processor_to_shader_stage(type), shader_state,
+ &outputs_written);
shader_state->compiled = true;
+
+ /* Fixup the stream out information, since what Gallium returns
+ * normally is mildly insane */
+
+ shader_state->stream_output = variants->base.stream_output;
+ shader_state->so_mask =
+ update_so_info(&shader_state->stream_output, outputs_written);
}
}
.swizzle = panfrost_translate_swizzle_4(user_swizzle)
};
- texture_descriptor.nr_mipmap_levels = last_level - first_level;
+ texture_descriptor.levels = last_level - first_level;
so->hw = texture_descriptor;
bool is_scanout = panfrost_is_scanout(ctx);
bool has_draws = job->last_job.gpu;
- if (!ctx->wallpaper_batch && (!is_scanout || has_draws)) {
- panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+ /* Bail out early when the current and new states are the same. */
+ if (util_framebuffer_state_equal(&ctx->pipe_framebuffer, fb))
+ return;
+
+ /* The wallpaper logic sets a new FB state before doing the blit and
+ * restore the old one when it's done. Those FB states are reported to
+ * be different because the surface they are pointing to are different,
+ * but those surfaces actually point to the same cbufs/zbufs. In that
+ * case we definitely don't want new FB descs to be emitted/attached
+ * since the job is expected to be flushed just after the blit is done,
+ * so let's just copy the new state and return here.
+ */
+ if (ctx->wallpaper_batch) {
+ util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
+ return;
}
+ if (!is_scanout || has_draws)
+ panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+ else
+ assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer &&
+ !ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
+
+ /* Invalidate the FBO job cache since we've just been assigned a new
+ * FB state.
+ */
+ ctx->job = NULL;
+
util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
/* Given that we're rendering, we'd love to have compression */
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
panfrost_hint_afbc(screen, &ctx->pipe_framebuffer);
-
- if (screen->require_sfbd)
- ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx, ~0);
- else
- ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx, ~0);
-
- panfrost_attach_vt_framebuffer(ctx, false);
+ for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
+ ctx->payloads[i].postfix.framebuffer = 0;
}
static void *
panfrost_set_active_query_state(struct pipe_context *pipe,
bool enable)
{
- //struct panfrost_context *panfrost = pan_context(pipe);
+ struct panfrost_context *ctx = pan_context(pipe);
+ ctx->active_queries = enable;
}
static void
util_blitter_destroy(panfrost->blitter_wallpaper);
panfrost_drm_free_slab(screen, &panfrost->scratchpad);
- panfrost_drm_free_slab(screen, &panfrost->shaders);
panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
- panfrost_drm_free_slab(screen, &panfrost->tiler_polygon_list);
panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
ralloc_free(pipe);
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
/* Allocate a word for the query results to be stored */
query->transfer = panfrost_allocate_transient(ctx, sizeof(unsigned));
-
ctx->occlusion_query = query;
+ break;
+ /* Geometry statistics are computed in the driver. XXX: geom/tess
+ * shaders.. */
+
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ query->start = ctx->prims_generated;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ query->start = ctx->tf_prims_generated;
break;
- }
default:
- DBG("Skipping query %d\n", query->type);
+ fprintf(stderr, "Skipping query %d\n", query->type);
break;
}
panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct panfrost_context *ctx = pan_context(pipe);
- ctx->occlusion_query = NULL;
+ struct panfrost_query *query = (struct panfrost_query *) q;
+
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ ctx->occlusion_query = NULL;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ query->end = ctx->prims_generated;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ query->end = ctx->tf_prims_generated;
+ break;
+ }
+
return true;
}
bool wait,
union pipe_query_result *vresult)
{
- /* STUB */
struct panfrost_query *query = (struct panfrost_query *) q;
- /* We need to flush out the jobs to actually run the counter, TODO
- * check wait, TODO wallpaper after if needed */
-
- panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ /* Flush first */
+ panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+
/* Read back the query results */
unsigned *result = (unsigned *) query->transfer.cpu;
unsigned passed = *result;
}
break;
- }
+
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
+ vresult->u64 = query->end - query->start;
+ break;
+
default:
DBG("Skipped query get %d\n", query->type);
break;
struct pipe_stream_output_target **targets,
const unsigned *offsets)
{
- /* STUB */
+ struct panfrost_context *ctx = pan_context(pctx);
+ struct panfrost_streamout *so = &ctx->streamout;
+
+ assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (unsigned i = 0; i < num_targets; i++) {
+ if (offsets[i] != -1)
+ so->offsets[i] = offsets[i];
+
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+ }
+
+ for (unsigned i = 0; i < so->num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], NULL);
+
+ so->num_targets = num_targets;
}
static void
struct panfrost_screen *screen = pan_screen(gallium->screen);
panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
- panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
- panfrost_drm_allocate_slab(screen, &ctx->tiler_polygon_list, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
}