vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
struct vc4_context *vc4 = vc4_context(pctx);
- bool old_msaa = vc4->msaa;
- int old_tile_width = vc4->tile_width;
- int old_tile_height = vc4->tile_height;
+ struct vc4_job *job = vc4->job;
+ bool old_msaa = job->msaa;
+ int old_tile_width = job->tile_width;
+ int old_tile_height = job->tile_height;
bool msaa = (info->src.resource->nr_samples > 1 ||
info->dst.resource->nr_samples > 1);
int tile_width = msaa ? 32 : 64;
struct pipe_surface *src_surf =
vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
- pipe_surface_reference(&vc4->color_read, src_surf);
+ pipe_surface_reference(&job->color_read, src_surf);
if (dst_surf->texture->nr_samples > 1)
- pipe_surface_reference(&vc4->color_write, dst_surf);
+ pipe_surface_reference(&job->color_write, dst_surf);
else
- pipe_surface_reference(&vc4->msaa_color_write, dst_surf);
+ pipe_surface_reference(&job->msaa_color_write, dst_surf);
- vc4->draw_min_x = info->dst.box.x;
- vc4->draw_min_y = info->dst.box.y;
- vc4->draw_max_x = info->dst.box.x + info->dst.box.width;
- vc4->draw_max_y = info->dst.box.y + info->dst.box.height;
- vc4->draw_width = dst_surf->width;
- vc4->draw_height = dst_surf->height;
+ job->draw_min_x = info->dst.box.x;
+ job->draw_min_y = info->dst.box.y;
+ job->draw_max_x = info->dst.box.x + info->dst.box.width;
+ job->draw_max_y = info->dst.box.y + info->dst.box.height;
+ job->draw_width = dst_surf->width;
+ job->draw_height = dst_surf->height;
- vc4->tile_width = tile_width;
- vc4->tile_height = tile_height;
- vc4->msaa = msaa;
- vc4->needs_flush = true;
+ job->tile_width = tile_width;
+ job->tile_height = tile_height;
+ job->msaa = msaa;
+ job->needs_flush = true;
- vc4_job_submit(vc4);
+ vc4_job_submit(vc4, job);
- vc4->msaa = old_msaa;
- vc4->tile_width = old_tile_width;
- vc4->tile_height = old_tile_height;
+ job->msaa = old_msaa;
+ job->tile_width = old_tile_width;
+ job->tile_height = old_tile_height;
pipe_surface_reference(&dst_surf, NULL);
pipe_surface_reference(&src_surf, NULL);
#include "vc4_context.h"
void
-vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
+vc4_init_cl(void *mem_ctx, struct vc4_cl *cl)
{
- cl->base = ralloc_size(vc4, 1);
+ cl->base = ralloc_size(mem_ctx, 1);
cl->next = cl->base;
cl->size = 0;
}
}
uint32_t
-vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
+vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo)
{
uint32_t hindex;
- uint32_t *current_handles = vc4->bo_handles.base;
+ uint32_t *current_handles = job->bo_handles.base;
- for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) {
+ for (hindex = 0; hindex < cl_offset(&job->bo_handles) / 4; hindex++) {
if (current_handles[hindex] == bo->handle)
return hindex;
}
struct vc4_cl_out *out;
- out = cl_start(&vc4->bo_handles);
+ out = cl_start(&job->bo_handles);
cl_u32(&out, bo->handle);
- cl_end(&vc4->bo_handles, out);
+ cl_end(&job->bo_handles, out);
- out = cl_start(&vc4->bo_pointers);
+ out = cl_start(&job->bo_pointers);
cl_ptr(&out, vc4_bo_reference(bo));
- cl_end(&vc4->bo_pointers, out);
+ cl_end(&job->bo_pointers, out);
return hindex;
}
#include "kernel/vc4_packet.h"
struct vc4_bo;
+struct vc4_job;
/**
* Undefined structure, used for typechecking that you're passing the pointers
#endif
};
-void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
+void vc4_init_cl(void *mem_ctx, struct vc4_cl *cl);
void vc4_reset_cl(struct vc4_cl *cl);
void vc4_dump_cl(void *cl, uint32_t size, bool is_render);
-uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
+uint32_t vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo);
struct PACKED unaligned_16 { uint16_t x; };
struct PACKED unaligned_32 { uint32_t x; };
}
static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
+cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
cl_advance(&cl->reloc_next, 4);
#ifdef DEBUG
}
static inline void
-cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
cl_advance(&cl->reloc_next, 4);
#ifdef DEBUG
struct vc4_context *vc4 = vc4_context(pctx);
struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
+ struct vc4_job *job = vc4->job;
- if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
+ if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) {
if (cbuf->texture->nr_samples > 1) {
- pipe_surface_reference(&vc4->msaa_color_write, cbuf);
+ pipe_surface_reference(&job->msaa_color_write, cbuf);
} else {
- pipe_surface_reference(&vc4->color_write, cbuf);
+ pipe_surface_reference(&job->color_write, cbuf);
}
- if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
- pipe_surface_reference(&vc4->color_read, cbuf);
+ if (!(job->cleared & PIPE_CLEAR_COLOR0)) {
+ pipe_surface_reference(&job->color_read, cbuf);
}
}
- if (vc4->framebuffer.zsbuf &&
- (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ if (zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
if (zsbuf->texture->nr_samples > 1) {
- pipe_surface_reference(&vc4->msaa_zs_write, zsbuf);
+ pipe_surface_reference(&job->msaa_zs_write, zsbuf);
} else {
- pipe_surface_reference(&vc4->zs_write, zsbuf);
+ pipe_surface_reference(&job->zs_write, zsbuf);
}
- if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
- pipe_surface_reference(&vc4->zs_read, zsbuf);
+ if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+ pipe_surface_reference(&job->zs_read, zsbuf);
}
}
- vc4_job_submit(vc4);
+ vc4_job_submit(vc4, job);
+
+ /* We have no hardware context saved between our draw calls, so we
+ * need to flag the next draw as needing all state emitted. Emitting
+ * all state at the start of our draws is also what ensures that we
+ * return to the state we need after a previous tile has finished.
+ */
+ vc4->dirty = ~0;
}
static void
bool include_reads)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
- if (!vc4->needs_flush)
+ if (!job->needs_flush)
return false;
/* Walk all the referenced BOs in the drawing command list to see if
* they match.
*/
if (include_reads) {
- struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
+ struct vc4_bo **referenced_bos = job->bo_pointers.base;
+ for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
if (referenced_bos[i] == bo) {
return true;
}
struct pipe_surface *zsurf = vc4->framebuffer.zsbuf;
if (zsurf && zsurf->texture == prsc)
- vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+ vc4->job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
}
static void
vc4_query_init(pctx);
vc4_resource_context_init(pctx);
- vc4_job_init(vc4);
+ vc4->job = rzalloc(vc4, struct vc4_job);
+ vc4_job_init(vc4->job);
vc4->fd = screen->fd;
unsigned num_elements;
};
-struct vc4_context {
- struct pipe_context base;
-
- int fd;
- struct vc4_screen *screen;
-
+/**
+ * A complete bin/render job.
+ *
+ * This is all of the state necessary to submit a bin/render to the kernel.
+ * We want to be able to have multiple in progress at a time, so that we don't
+ * need to flush an existing CL just to switch to rendering to a new render
+ * target (which would mean reading back from the old render target when
+ * starting to render to it again).
+ */
+struct vc4_job {
struct vc4_cl bcl;
struct vc4_cl shader_rec;
struct vc4_cl uniforms;
bool msaa;
/** @} */
- struct slab_mempool transfer_pool;
- struct blitter_context *blitter;
-
- /** bitfield of VC4_DIRTY_* */
- uint32_t dirty;
/* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
* first rendering.
*/
* the current job.
*/
uint32_t draw_calls_queued;
+};
- /** Maximum index buffer valid for the current shader_rec. */
- uint32_t max_index;
- /** Last index bias baked into the current shader_rec. */
- uint32_t last_index_bias;
+struct vc4_context {
+ struct pipe_context base;
+
+ int fd;
+ struct vc4_screen *screen;
+
+ /** The render job for the currently bound FBO. */
+ struct vc4_job *job;
+
+ struct slab_mempool transfer_pool;
+ struct blitter_context *blitter;
+
+ /** bitfield of VC4_DIRTY_* */
+ uint32_t dirty;
struct primconvert_context *primconvert;
uint8_t prim_mode;
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+ /** Last index bias baked into the current shader_rec. */
+ uint32_t last_index_bias;
+
/** Seqno of the last CL flush's job. */
uint64_t last_emit_seqno;
struct vc4_texture_stateobj *texstate);
void vc4_flush(struct pipe_context *pctx);
-void vc4_job_init(struct vc4_context *vc4);
-void vc4_job_submit(struct vc4_context *vc4);
-void vc4_job_reset(struct vc4_context *vc4);
+void vc4_job_init(struct vc4_job *job);
+void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job);
+void vc4_job_reset(struct vc4_job *job);
bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo,
bool include_reads);
void vc4_emit_state(struct pipe_context *pctx);
#include "vc4_resource.h"
static void
-vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count)
+vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
{
/* The SW-5891 workaround may cause us to emit multiple shader recs
* and draw packets.
/* Binner gets our packet state -- vc4_emit.c contents,
* and the primitive itself.
*/
- cl_ensure_space(&vc4->bcl,
+ cl_ensure_space(&job->bcl,
256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE +
VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws);
* sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
* vattr stride).
*/
- cl_ensure_space(&vc4->shader_rec,
+ cl_ensure_space(&job->shader_rec,
(12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws);
/* Uniforms are covered by vc4_write_uniforms(). */
/* There could be up to 16 textures per stage, plus misc other
* pointers.
*/
- cl_ensure_space(&vc4->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
- cl_ensure_space(&vc4->bo_pointers,
+ cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
+ cl_ensure_space(&job->bo_pointers,
(2 * 16 + 20) * sizeof(struct vc4_bo *));
}
static void
vc4_start_draw(struct vc4_context *vc4, int vert_count)
{
- if (vc4->needs_flush)
+ struct vc4_job *job = vc4->job;
+
+ if (job->needs_flush)
return;
- vc4_get_draw_cl_space(vc4, 0);
+ vc4_get_draw_cl_space(job, 0);
- struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&bcl, vc4->draw_tiles_x);
- cl_u8(&bcl, vc4->draw_tiles_y);
+ cl_u8(&bcl, job->draw_tiles_x);
+ cl_u8(&bcl, job->draw_tiles_y);
/* Other flags are filled by kernel. */
- cl_u8(&bcl, vc4->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
+ cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
- vc4->needs_flush = true;
- vc4->draw_calls_queued++;
- vc4->draw_width = vc4->framebuffer.width;
- vc4->draw_height = vc4->framebuffer.height;
+ job->needs_flush = true;
+ job->draw_calls_queued++;
+ job->draw_width = vc4->framebuffer.width;
+ job->draw_height = vc4->framebuffer.height;
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
}
static void
}
static void
-vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info,
+vc4_emit_gl_shader_state(struct vc4_context *vc4,
+ const struct pipe_draw_info *info,
uint32_t extra_index_bias)
{
+ struct vc4_job *job = vc4->job;
/* VC4_DIRTY_VTXSTATE */
struct vc4_vertex_stateobj *vtx = vc4->vtx;
/* VC4_DIRTY_VTXBUF */
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
/* Emit the shader record. */
struct vc4_cl_out *shader_rec =
- cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+ cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit);
/* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
cl_u16(&shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
/* VC4_DIRTY_COMPILED_FS */
cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
- cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+ cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
/* VC4_DIRTY_COMPILED_VS */
cl_u16(&shader_rec, 0); /* vs num uniforms */
cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+ cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
/* VC4_DIRTY_COMPILED_CS */
cl_u16(&shader_rec, 0); /* cs num uniforms */
cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+ cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
- cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+ cl_reloc(job, &job->shader_rec, &shader_rec, rsc->bo, offset);
cl_u8(&shader_rec, elem_size - 1);
cl_u8(&shader_rec, vb->stride);
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
if (vtx->num_elements == 0) {
assert(num_elements_emit == 1);
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
- cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+ cl_reloc(job, &job->shader_rec, &shader_rec, bo, 0);
cl_u8(&shader_rec, 16 - 1); /* element size */
cl_u8(&shader_rec, 0); /* stride */
cl_u8(&shader_rec, 0); /* VS VPM offset */
cl_u8(&shader_rec, 0); /* CS VPM offset */
vc4_bo_unreference(&bo);
}
- cl_end(&vc4->shader_rec, shader_rec);
+ cl_end(&job->shader_rec, shader_rec);
- struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
/* the actual draw call. */
cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
* attributes. This field also contains the offset into shader_rec.
*/
cl_u32(&bcl, num_elements_emit & 0x7);
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
vc4_write_uniforms(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
vc4->last_index_bias = info->index_bias + extra_index_bias;
vc4->max_index = max_index;
- vc4->shader_rec_count++;
+ job->shader_rec_count++;
}
/**
vc4_hw_2116_workaround(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
- if (vc4->draw_calls_queued == 0x1ef0) {
+ if (job->draw_calls_queued == 0x1ef0) {
perf_debug("Flushing batch due to HW-2116 workaround "
"(too many draw calls per scene\n");
vc4_flush(pctx);
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
if (info->mode >= PIPE_PRIM_QUADS) {
util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
vc4_hw_2116_workaround(pctx);
- vc4_get_draw_cl_space(vc4, info->count);
+ vc4_get_draw_cl_space(job, info->count);
if (vc4->prim_mode != info->mode) {
vc4->prim_mode = info->mode;
vc4_start_draw(vc4, info->count);
vc4_update_compiled_shaders(vc4, info->mode);
- uint32_t start_draw_calls_queued = vc4->draw_calls_queued;
+ uint32_t start_draw_calls_queued = job->draw_calls_queued;
vc4_emit_state(pctx);
if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
- struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (info->indexed) {
uint32_t offset = vc4->indexbuf.offset;
uint32_t index_size = vc4->indexbuf.index_size;
}
struct vc4_resource *rsc = vc4_resource(prsc);
- cl_start_reloc(&vc4->bcl, &bcl, 1);
+ cl_start_reloc(&job->bcl, &bcl, 1);
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&bcl,
info->mode |
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
cl_u32(&bcl, info->count);
- cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+ cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, vc4->max_index);
if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
* plus whatever remainder.
*/
if (extra_index_bias) {
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
vc4_emit_gl_shader_state(vc4, info,
extra_index_bias);
- bcl = cl_start(&vc4->bcl);
+ bcl = cl_start(&job->bcl);
}
if (start + count > max_verts) {
start = 0;
}
}
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
/* No flushes of the job should have happened between when we started
* emitting state for our draw and when we just emitted our draw's
* primitives.
*/
- assert(start_draw_calls_queued == vc4->draw_calls_queued);
+ assert(start_draw_calls_queued == job->draw_calls_queued);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
- vc4->resolve |= PIPE_CLEAR_DEPTH;
+ job->resolve |= PIPE_CLEAR_DEPTH;
}
if (vc4->zsa && vc4->zsa->base.stencil[0].enabled)
- vc4->resolve |= PIPE_CLEAR_STENCIL;
- vc4->resolve |= PIPE_CLEAR_COLOR0;
+ job->resolve |= PIPE_CLEAR_STENCIL;
+ job->resolve |= PIPE_CLEAR_COLOR0;
if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
vc4_flush(pctx);
const union pipe_color_union *color, double depth, unsigned stencil)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
/* We can't flag new buffers for clearing once we've queued draws. We
* could avoid this by using the 3d engine to clear.
*/
- if (vc4->draw_calls_queued) {
+ if (job->draw_calls_queued) {
perf_debug("Flushing rendering to process new clear.\n");
vc4_flush(pctx);
}
}
if (buffers & PIPE_CLEAR_COLOR0) {
- vc4->clear_color[0] = vc4->clear_color[1] =
+ job->clear_color[0] = job->clear_color[1] =
pack_rgba(vc4->framebuffer.cbufs[0]->format,
color->f);
}
/* Though the depth buffer is stored with Z in the high 24,
* for this field we just need to store it in the low 24.
*/
- vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth);
- vc4->clear_stencil = stencil;
+ job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth);
+ job->clear_stencil = stencil;
}
- vc4->draw_min_x = 0;
- vc4->draw_min_y = 0;
- vc4->draw_max_x = vc4->framebuffer.width;
- vc4->draw_max_y = vc4->framebuffer.height;
- vc4->cleared |= buffers;
- vc4->resolve |= buffers;
+ job->draw_min_x = 0;
+ job->draw_min_y = 0;
+ job->draw_max_x = vc4->framebuffer.width;
+ job->draw_max_y = vc4->framebuffer.height;
+ job->cleared |= buffers;
+ job->resolve |= buffers;
vc4_start_draw(vc4, 0);
}
vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
- struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT |
VC4_DIRTY_RASTERIZER)) {
float *vpscale = vc4->viewport.scale;
if (!vc4->rasterizer->base.scissor) {
minx = MAX2(vp_minx, 0);
miny = MAX2(vp_miny, 0);
- maxx = MIN2(vp_maxx, vc4->draw_width);
- maxy = MIN2(vp_maxy, vc4->draw_height);
+ maxx = MIN2(vp_maxx, job->draw_width);
+ maxy = MIN2(vp_maxy, job->draw_height);
} else {
minx = MAX2(vp_minx, vc4->scissor.minx);
miny = MAX2(vp_miny, vc4->scissor.miny);
cl_u16(&bcl, maxx - minx);
cl_u16(&bcl, maxy - miny);
- vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
- vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
- vc4->draw_max_x = MAX2(vc4->draw_max_x, maxx);
- vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy);
+ job->draw_min_x = MIN2(job->draw_min_x, minx);
+ job->draw_min_y = MIN2(job->draw_min_y, miny);
+ job->draw_max_x = MAX2(job->draw_max_x, maxx);
+ job->draw_max_y = MAX2(job->draw_max_y, maxy);
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER |
* was seeing bad rendering on glxgears -samples 4 even in
* that case.
*/
- if (vc4->msaa || vc4->prog.fs->disable_early_z)
+ if (job->msaa || vc4->prog.fs->disable_early_z)
ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z;
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
vc4->prog.fs->color_inputs : 0);
}
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
}
#include "vc4_context.h"
void
-vc4_job_init(struct vc4_context *vc4)
+vc4_job_init(struct vc4_job *job)
{
- vc4_init_cl(vc4, &vc4->bcl);
- vc4_init_cl(vc4, &vc4->shader_rec);
- vc4_init_cl(vc4, &vc4->uniforms);
- vc4_init_cl(vc4, &vc4->bo_handles);
- vc4_init_cl(vc4, &vc4->bo_pointers);
- vc4_job_reset(vc4);
+ vc4_init_cl(job, &job->bcl);
+ vc4_init_cl(job, &job->shader_rec);
+ vc4_init_cl(job, &job->uniforms);
+ vc4_init_cl(job, &job->bo_handles);
+ vc4_init_cl(job, &job->bo_pointers);
+ vc4_job_reset(job);
}
void
-vc4_job_reset(struct vc4_context *vc4)
+vc4_job_reset(struct vc4_job *job)
{
- struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
+ struct vc4_bo **referenced_bos = job->bo_pointers.base;
+ for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
vc4_bo_unreference(&referenced_bos[i]);
}
- vc4_reset_cl(&vc4->bcl);
- vc4_reset_cl(&vc4->shader_rec);
- vc4_reset_cl(&vc4->uniforms);
- vc4_reset_cl(&vc4->bo_handles);
- vc4_reset_cl(&vc4->bo_pointers);
- vc4->shader_rec_count = 0;
-
- vc4->needs_flush = false;
- vc4->draw_calls_queued = 0;
-
- /* We have no hardware context saved between our draw calls, so we
- * need to flag the next draw as needing all state emitted. Emitting
- * all state at the start of our draws is also what ensures that we
- * return to the state we need after a previous tile has finished.
- */
- vc4->dirty = ~0;
- vc4->resolve = 0;
- vc4->cleared = 0;
-
- vc4->draw_min_x = ~0;
- vc4->draw_min_y = ~0;
- vc4->draw_max_x = 0;
- vc4->draw_max_y = 0;
-
- pipe_surface_reference(&vc4->color_write, NULL);
- pipe_surface_reference(&vc4->color_read, NULL);
- pipe_surface_reference(&vc4->msaa_color_write, NULL);
- pipe_surface_reference(&vc4->zs_write, NULL);
- pipe_surface_reference(&vc4->zs_read, NULL);
- pipe_surface_reference(&vc4->msaa_zs_write, NULL);
+ vc4_reset_cl(&job->bcl);
+ vc4_reset_cl(&job->shader_rec);
+ vc4_reset_cl(&job->uniforms);
+ vc4_reset_cl(&job->bo_handles);
+ vc4_reset_cl(&job->bo_pointers);
+ job->shader_rec_count = 0;
+
+ job->needs_flush = false;
+ job->draw_calls_queued = 0;
+
+ job->resolve = 0;
+ job->cleared = 0;
+
+ job->draw_min_x = ~0;
+ job->draw_min_y = ~0;
+ job->draw_max_x = 0;
+ job->draw_max_y = 0;
+
+ pipe_surface_reference(&job->color_write, NULL);
+ pipe_surface_reference(&job->color_read, NULL);
+ pipe_surface_reference(&job->msaa_color_write, NULL);
+ pipe_surface_reference(&job->zs_write, NULL);
+ pipe_surface_reference(&job->zs_read, NULL);
+ pipe_surface_reference(&job->msaa_zs_write, NULL);
}
static void
-vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
+vc4_submit_setup_rcl_surface(struct vc4_job *job,
struct drm_vc4_submit_rcl_surface *submit_surf,
struct pipe_surface *psurf,
bool is_depth, bool is_write)
}
struct vc4_resource *rsc = vc4_resource(psurf->texture);
- submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
submit_surf->offset = surf->offset;
if (psurf->texture->nr_samples <= 1) {
}
static void
-vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
+vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
struct drm_vc4_submit_rcl_surface *submit_surf,
struct pipe_surface *psurf)
{
}
struct vc4_resource *rsc = vc4_resource(psurf->texture);
- submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
submit_surf->offset = surf->offset;
if (psurf->texture->nr_samples <= 1) {
}
static void
-vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4,
+vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
struct drm_vc4_submit_rcl_surface *submit_surf,
struct pipe_surface *psurf)
{
}
struct vc4_resource *rsc = vc4_resource(psurf->texture);
- submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+ submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
submit_surf->offset = surf->offset;
submit_surf->bits = 0;
rsc->writes++;
* Submits the job to the kernel and then reinitializes it.
*/
void
-vc4_job_submit(struct vc4_context *vc4)
+vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
{
- if (!vc4->needs_flush)
+ if (!job->needs_flush)
return;
/* The RCL setup would choke if the draw bounds cause no drawing, so
* just drop the drawing if that's the case.
*/
- if (vc4->draw_max_x <= vc4->draw_min_x ||
- vc4->draw_max_y <= vc4->draw_min_y) {
- vc4_job_reset(vc4);
+ if (job->draw_max_x <= job->draw_min_x ||
+ job->draw_max_y <= job->draw_min_y) {
+ vc4_job_reset(job);
return;
}
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
- vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
+ vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
}
- if (cl_offset(&vc4->bcl) > 0) {
+ if (cl_offset(&job->bcl) > 0) {
/* Increment the semaphore indicating that binning is done and
* unblocking the render thread. Note that this doesn't act
* until the FLUSH completes.
*/
- cl_ensure_space(&vc4->bcl, 8);
- struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ cl_ensure_space(&job->bcl, 8);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a
* VC4_PACKET_RETURN.
*/
cl_u8(&bcl, VC4_PACKET_FLUSH);
- cl_end(&vc4->bcl, bcl);
+ cl_end(&job->bcl, bcl);
}
struct drm_vc4_submit_cl submit;
memset(&submit, 0, sizeof(submit));
- cl_ensure_space(&vc4->bo_handles, 6 * sizeof(uint32_t));
- cl_ensure_space(&vc4->bo_pointers, 6 * sizeof(struct vc4_bo *));
+ cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
+ cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
- vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
- vc4->color_read, false, false);
- vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write,
- vc4->color_write);
- vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
- vc4->zs_read, true, false);
- vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
- vc4->zs_write, true, true);
+ vc4_submit_setup_rcl_surface(job, &submit.color_read,
+ job->color_read, false, false);
+ vc4_submit_setup_rcl_render_config_surface(job, &submit.color_write,
+ job->color_write);
+ vc4_submit_setup_rcl_surface(job, &submit.zs_read,
+ job->zs_read, true, false);
+ vc4_submit_setup_rcl_surface(job, &submit.zs_write,
+ job->zs_write, true, true);
- vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_color_write,
- vc4->msaa_color_write);
- vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_zs_write,
- vc4->msaa_zs_write);
+ vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_color_write,
+ job->msaa_color_write);
+ vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
+ job->msaa_zs_write);
- if (vc4->msaa) {
+ if (job->msaa) {
/* This bit controls how many pixels the general
* (i.e. subsampled) loads/stores are iterating over
* (multisample loads replicate out to the other samples).
submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
}
- submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
- submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
- submit.bin_cl = (uintptr_t)vc4->bcl.base;
- submit.bin_cl_size = cl_offset(&vc4->bcl);
- submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
- submit.shader_rec_size = cl_offset(&vc4->shader_rec);
- submit.shader_rec_count = vc4->shader_rec_count;
- submit.uniforms = (uintptr_t)vc4->uniforms.base;
- submit.uniforms_size = cl_offset(&vc4->uniforms);
-
- assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
- submit.min_x_tile = vc4->draw_min_x / vc4->tile_width;
- submit.min_y_tile = vc4->draw_min_y / vc4->tile_height;
- submit.max_x_tile = (vc4->draw_max_x - 1) / vc4->tile_width;
- submit.max_y_tile = (vc4->draw_max_y - 1) / vc4->tile_height;
- submit.width = vc4->draw_width;
- submit.height = vc4->draw_height;
- if (vc4->cleared) {
+ submit.bo_handles = (uintptr_t)job->bo_handles.base;
+ submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
+ submit.bin_cl = (uintptr_t)job->bcl.base;
+ submit.bin_cl_size = cl_offset(&job->bcl);
+ submit.shader_rec = (uintptr_t)job->shader_rec.base;
+ submit.shader_rec_size = cl_offset(&job->shader_rec);
+ submit.shader_rec_count = job->shader_rec_count;
+ submit.uniforms = (uintptr_t)job->uniforms.base;
+ submit.uniforms_size = cl_offset(&job->uniforms);
+
+ assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
+ submit.min_x_tile = job->draw_min_x / job->tile_width;
+ submit.min_y_tile = job->draw_min_y / job->tile_height;
+ submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
+ submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
+ submit.width = job->draw_width;
+ submit.height = job->draw_height;
+ if (job->cleared) {
submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
- submit.clear_color[0] = vc4->clear_color[0];
- submit.clear_color[1] = vc4->clear_color[1];
- submit.clear_z = vc4->clear_depth;
- submit.clear_s = vc4->clear_stencil;
+ submit.clear_color[0] = job->clear_color[0];
+ submit.clear_color[1] = job->clear_color[1];
+ submit.clear_z = job->clear_depth;
+ submit.clear_s = job->clear_stencil;
}
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
}
}
- vc4_job_reset(vc4);
+ vc4_job_reset(vc4->job);
}
static void
vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
{
+ struct vc4_job *job = vc4->job;
struct vc4_fs_key local_key;
struct vc4_fs_key *key = &local_key;
} else {
key->logicop_func = PIPE_LOGICOP_COPY;
}
- if (vc4->msaa) {
+ if (job->msaa) {
key->msaa = vc4->rasterizer->base.multisample;
key->sample_coverage = (vc4->rasterizer->base.multisample &&
vc4->sample_mask != (1 << VC4_MAX_SAMPLES) - 1);
{
struct drm_vc4_submit_cl *args = exec->args;
struct vc4_context *vc4 = dev->vc4;
- struct vc4_bo **bos = vc4->bo_pointers.base;
+ struct vc4_job *job = vc4->job;
+ struct vc4_bo **bos = job->bo_pointers.base;
exec->bo_count = args->bo_handle_count;
exec->bo = calloc(exec->bo_count, sizeof(void *));
const struct pipe_framebuffer_state *framebuffer)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_job *job = vc4->job;
struct pipe_framebuffer_state *cso = &vc4->framebuffer;
unsigned i;
struct vc4_resource *rsc =
vc4_resource(cso->cbufs[0]->texture);
if (!rsc->writes)
- vc4->cleared |= PIPE_CLEAR_COLOR0;
+ job->cleared |= PIPE_CLEAR_COLOR0;
}
if (cso->zsbuf) {
struct vc4_resource *rsc =
vc4_resource(cso->zsbuf->texture);
if (!rsc->writes)
- vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
}
/* Nonzero texture mipmap levels are laid out as if they were in
rsc->cpp);
}
- vc4->msaa = false;
+ job->msaa = false;
if (cso->cbufs[0])
- vc4->msaa = cso->cbufs[0]->texture->nr_samples > 1;
+ job->msaa = cso->cbufs[0]->texture->nr_samples > 1;
else if (cso->zsbuf)
- vc4->msaa = cso->zsbuf->texture->nr_samples > 1;
+ job->msaa = cso->zsbuf->texture->nr_samples > 1;
- if (vc4->msaa) {
- vc4->tile_width = 32;
- vc4->tile_height = 32;
+ if (job->msaa) {
+ job->tile_width = 32;
+ job->tile_height = 32;
} else {
- vc4->tile_width = 64;
- vc4->tile_height = 64;
+ job->tile_width = 64;
+ job->tile_height = 64;
}
- vc4->draw_tiles_x = DIV_ROUND_UP(cso->width, vc4->tile_width);
- vc4->draw_tiles_y = DIV_ROUND_UP(cso->height, vc4->tile_height);
+ job->draw_tiles_x = DIV_ROUND_UP(cso->width, job->tile_width);
+ job->draw_tiles_y = DIV_ROUND_UP(cso->height, job->tile_height);
vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
}
#include "vc4_qir.h"
static void
-write_texture_p0(struct vc4_context *vc4,
+write_texture_p0(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
vc4_sampler_view(texstate->textures[unit]);
struct vc4_resource *rsc = vc4_resource(sview->base.texture);
- cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0);
+ cl_reloc(job, &job->uniforms, uniforms, rsc->bo, sview->texture_p0);
}
static void
-write_texture_p1(struct vc4_context *vc4,
+write_texture_p1(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
}
static void
-write_texture_p2(struct vc4_context *vc4,
+write_texture_p2(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t data)
}
static void
-write_texture_first_level(struct vc4_context *vc4,
+write_texture_first_level(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t data)
}
static void
-write_texture_msaa_addr(struct vc4_context *vc4,
+write_texture_msaa_addr(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
struct pipe_sampler_view *texture = texstate->textures[unit];
struct vc4_resource *rsc = vc4_resource(texture->texture);
- cl_aligned_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, 0);
+ cl_aligned_reloc(job, &job->uniforms, uniforms, rsc->bo, 0);
}
}
static void
-write_texture_border_color(struct vc4_context *vc4,
+write_texture_border_color(struct vc4_job *job,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
struct vc4_texture_stateobj *texstate)
{
struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
+ struct vc4_job *job = vc4->job;
const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
- cl_ensure_space(&vc4->uniforms, (uinfo->count +
+ cl_ensure_space(&job->uniforms, (uinfo->count +
uinfo->num_texture_samples) * 4);
struct vc4_cl_out *uniforms =
- cl_start_shader_reloc(&vc4->uniforms,
+ cl_start_shader_reloc(&job->uniforms,
uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(vc4, &uniforms, texstate,
+ write_texture_p0(job, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(vc4, &uniforms, texstate,
+ write_texture_p1(job, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(vc4, &uniforms, texstate,
+ write_texture_p2(job, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_FIRST_LEVEL:
- write_texture_first_level(vc4, &uniforms, texstate,
+ write_texture_first_level(job, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
+ cl_aligned_reloc(job, &job->uniforms, &uniforms, ubo, 0);
break;
case QUNIFORM_TEXTURE_MSAA_ADDR:
- write_texture_msaa_addr(vc4, &uniforms,
+ write_texture_msaa_addr(job, &uniforms,
texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_BORDER_COLOR:
- write_texture_border_color(vc4, &uniforms,
+ write_texture_border_color(job, &uniforms,
texstate, uinfo->data[i]);
break;
#endif
}
- cl_end(&vc4->uniforms, uniforms);
+ cl_end(&job->uniforms, uniforms);
vc4_bo_unreference(&ubo);
}