X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_context.c;h=eeadea0b1db3db03396e4805fbfd2e0b115dff99;hb=e8959ba7afbda7a23805072efc15c6f11449103e;hp=7779c461c7660e69acf713284e7fe6b469ce4b1f;hpb=895c90410314103814ca4d2684f94463bd8f243f;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 7779c461c76..eeadea0b1db 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_blitter.h" +#include "util/u_upload_mgr.h" #include "indices/u_primconvert.h" #include "pipe/p_screen.h" @@ -36,307 +37,94 @@ #include "vc4_context.h" #include "vc4_resource.h" -/** - * Emits a no-op STORE_TILE_BUFFER_GENERAL. - * - * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of - * some sort before another load is triggered. - */ -static void -vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted) -{ - if (!*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); - cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR)); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - - *coords_emitted = false; -} - -/** - * Emits a PACKET_TILE_COORDINATES if one isn't already pending. - * - * The tile coordinates packet triggers a pending load if there is one, are - * used for clipping during rendering, and determine where loads/stores happen - * relative to their base address. - */ -static void -vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y, - bool *coords_emitted) -{ - if (*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, x); - cl_u8(&vc4->rcl, y); - - *coords_emitted = true; -} - -static void -vc4_setup_rcl(struct vc4_context *vc4) -{ - struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); - struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; - struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); - struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL; - - if (!csurf) - vc4->resolve &= ~PIPE_CLEAR_COLOR0; - if (!zsurf) - vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); - uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; - uint32_t width = vc4->framebuffer.width; - uint32_t height = vc4->framebuffer.height; - uint32_t xtiles = align(width, 64) / 64; - uint32_t ytiles = align(height, 64) / 64; - -#if 0 - fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", - vc4->resolve, - vc4->cleared, - resolve_uncleared); -#endif - - cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); - cl_u32(&vc4->rcl, vc4->clear_color[0]); - cl_u32(&vc4->rcl, vc4->clear_color[1]); - cl_u32(&vc4->rcl, vc4->clear_depth); - cl_u8(&vc4->rcl, vc4->clear_stencil); - - /* The rendering mode config determines the pointer that's used for - * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel - * could handle a no-relocation rendering mode config and deny those - * packets, but instead we just tell the kernel we're doing our color - * rendering to the Z buffer, and just don't emit any of those - * packets. - */ - struct vc4_surface *render_surf = csurf ? csurf : zsurf; - struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); - cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); - cl_u16(&vc4->rcl, width); - cl_u16(&vc4->rcl, height); - cl_u16(&vc4->rcl, ((render_surf->tiling << - VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | - (vc4_rt_format_is_565(render_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888) | - VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE)); - - /* The tile buffer normally gets cleared when the previous tile is - * stored. If the clear values changed between frames, then the tile - * buffer has stale clear values in it, so we have to do a store in - * None mode (no writes) so that we trigger the tile buffer clear. - * - * Excess clearing is only a performance cost, since per-tile contents - * will be loaded/stored in the loop below. - */ - if (vc4->cleared & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)) { - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, 0); - cl_u8(&vc4->rcl, 0); - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - } - - for (int y = 0; y < ytiles; y++) { - for (int x = 0; x < xtiles; x++) { - bool end_of_frame = (x == xtiles - 1 && - y == ytiles - 1); - bool coords_emitted = false; - - /* Note that the load doesn't actually occur until the - * tile coords packet is processed, and only one load - * may be outstanding at a time. - */ - if (resolve_uncleared & PIPE_CLEAR_COLOR) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_COLOR | - (csurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, - vc4_rt_format_is_565(csurf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888); - cl_reloc(vc4, &vc4->rcl, ctex->bo, - csurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_ZS | - (zsurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, 0); - cl_reloc(vc4, &vc4->rcl, ztex->bo, - zsurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - /* Clipping depends on tile coordinates having been - * emitted, so make sure it's happened even if - * everything was cleared to start. - */ - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); - cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc, - (y * xtiles + x) * 32); - - if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_ZS | - (zsurf->tiling << - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); - cl_u8(&vc4->rcl, - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); - cl_reloc(vc4, &vc4->rcl, ztex->bo, - zsurf->offset | - ((end_of_frame && - !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? - VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); - - coords_emitted = false; - } - - if (vc4->resolve & PIPE_CLEAR_COLOR0) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - if (end_of_frame) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); - } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); - } - - coords_emitted = false; - } - - /* One of the bits needs to have been set that would - * have triggered an EOF. - */ - assert(vc4->resolve & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)); - /* Any coords emitted must also have been consumed by - * a store. - */ - assert(!coords_emitted); - } - } -} - void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; + struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; if (!vc4->needs_flush) return; - /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ - cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); - - cl_u8(&vc4->bcl, VC4_PACKET_NOP); - cl_u8(&vc4->bcl, VC4_PACKET_HALT); + /* The RCL setup would choke if the draw bounds cause no drawing, so + * just drop the drawing if that's the case. + */ + if (vc4->draw_max_x <= vc4->draw_min_x || + vc4->draw_max_y <= vc4->draw_min_y) { + vc4_job_reset(vc4); + return; + } - vc4_setup_rcl(vc4); + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act until the + * FLUSH completes. + */ + cl_ensure_space(&vc4->bcl, 8); + struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE); + /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ + cl_u8(&bcl, VC4_PACKET_FLUSH); + cl_end(&vc4->bcl, bcl); + + if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_write, + cbuf->texture->nr_samples > 1 ? + NULL : cbuf); + pipe_surface_reference(&vc4->msaa_color_write, + cbuf->texture->nr_samples > 1 ? + cbuf : NULL); + + if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_read, cbuf); + } else { + pipe_surface_reference(&vc4->color_read, NULL); + } - if (vc4_debug & VC4_DEBUG_CL) { - fprintf(stderr, "BCL:\n"); - vc4_dump_cl(&vc4->bcl, false); - fprintf(stderr, "RCL:\n"); - vc4_dump_cl(&vc4->rcl, true); + } else { + pipe_surface_reference(&vc4->color_write, NULL); + pipe_surface_reference(&vc4->color_read, NULL); + pipe_surface_reference(&vc4->msaa_color_write, NULL); } - struct drm_vc4_submit_cl submit; - memset(&submit, 0, sizeof(submit)); - - submit.bo_handles = vc4->bo_handles.base; - submit.bo_handle_count = (vc4->bo_handles.next - - vc4->bo_handles.base) / 4; - submit.bin_cl = vc4->bcl.base; - submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base; - submit.render_cl = vc4->rcl.base; - submit.render_cl_size = vc4->rcl.next - vc4->rcl.base; - submit.shader_rec = vc4->shader_rec.base; - submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base; - submit.shader_rec_count = vc4->shader_rec_count; - submit.uniforms = vc4->uniforms.base; - submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base; - - if (!(vc4_debug & VC4_DEBUG_NORAST)) { - int ret; - -#ifndef USE_VC4_SIMULATOR - ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); -#else - ret = vc4_simulator_flush(vc4, &submit); -#endif - if (ret) { - fprintf(stderr, "VC4 submit failed\n"); - abort(); + if (vc4->framebuffer.zsbuf && + (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_write, + zsbuf->texture->nr_samples > 1 ? + NULL : zsbuf); + pipe_surface_reference(&vc4->msaa_zs_write, + zsbuf->texture->nr_samples > 1 ? + zsbuf : NULL); + + if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_read, zsbuf); + } else { + pipe_surface_reference(&vc4->zs_read, NULL); } + } else { + pipe_surface_reference(&vc4->zs_write, NULL); + pipe_surface_reference(&vc4->zs_read, NULL); + pipe_surface_reference(&vc4->msaa_zs_write, NULL); } - vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->rcl); - vc4_reset_cl(&vc4->shader_rec); - vc4_reset_cl(&vc4->uniforms); - vc4_reset_cl(&vc4->bo_handles); - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < submit.bo_handle_count; i++) - vc4_bo_unreference(&referenced_bos[i]); - vc4_reset_cl(&vc4->bo_pointers); - vc4->shader_rec_count = 0; - - vc4->needs_flush = false; - vc4->draw_call_queued = false; - - /* We have no hardware context saved between our draw calls, so we - * need to flag the next draw as needing all state emitted. Emitting - * all state at the start of our draws is also what ensures that we - * return to the state we need after a previous tile has finished. - */ - vc4->dirty = ~0; - vc4->resolve = 0; - vc4->cleared = 0; + vc4_job_submit(vc4); } static void vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { + struct vc4_context *vc4 = vc4_context(pctx); + vc4_flush(pctx); + + if (fence) { + struct pipe_screen *screen = pctx->screen; + struct vc4_fence *f = vc4_fence_create(vc4->screen, + vc4->last_emit_seqno); + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } } /** @@ -344,23 +132,24 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, * * This helps avoid flushing the command buffers when unnecessary. */ -void -vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) +bool +vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, + bool include_reads) { struct vc4_context *vc4 = vc4_context(pctx); if (!vc4->needs_flush) - return; + return false; /* Walk all the referenced BOs in the drawing command list to see if * they match. */ - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < (vc4->bo_handles.next - - vc4->bo_handles.base) / 4; i++) { - if (referenced_bos[i] == bo) { - vc4_flush(pctx); - return; + if (include_reads) { + struct vc4_bo **referenced_bos = vc4->bo_pointers.base; + for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { + if (referenced_bos[i] == bo) { + return true; + } } } @@ -371,8 +160,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) if (csurf) { struct vc4_resource *ctex = vc4_resource(csurf->base.texture); if (ctex->bo == bo) { - vc4_flush(pctx); - return; + return true; } } @@ -381,10 +169,21 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) struct vc4_resource *ztex = vc4_resource(zsurf->base.texture); if (ztex->bo == bo) { - vc4_flush(pctx); - return; + return true; } } + + return false; +} + +static void +vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_surface *zsurf = vc4->framebuffer.zsbuf; + + if (zsurf && zsurf->texture == prsc) + vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); } static void @@ -398,13 +197,24 @@ vc4_context_destroy(struct pipe_context *pctx) if (vc4->primconvert) util_primconvert_destroy(vc4->primconvert); + if (vc4->uploader) + u_upload_destroy(vc4->uploader); + util_slab_destroy(&vc4->transfer_pool); + pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); + pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL); + + pipe_surface_reference(&vc4->color_write, NULL); + pipe_surface_reference(&vc4->color_read, NULL); + + vc4_program_fini(pctx); + ralloc_free(vc4); } struct pipe_context * -vc4_context_create(struct pipe_screen *pscreen, void *priv) +vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct vc4_screen *screen = vc4_screen(pscreen); struct vc4_context *vc4; @@ -414,7 +224,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) vc4_debug &= ~VC4_DEBUG_SHADERDB; vc4 = rzalloc(NULL, struct vc4_context); - if (vc4 == NULL) + if (!vc4) return NULL; struct pipe_context *pctx = &vc4->base; @@ -424,6 +234,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) pctx->priv = priv; pctx->destroy = vc4_context_destroy; pctx->flush = vc4_pipe_flush; + pctx->invalidate_resource = vc4_invalidate_resource; vc4_draw_init(pctx); vc4_state_init(pctx); @@ -431,12 +242,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) vc4_query_init(pctx); vc4_resource_context_init(pctx); - vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->rcl); - vc4_init_cl(vc4, &vc4->shader_rec); - vc4_init_cl(vc4, &vc4->bo_handles); + vc4_job_init(vc4); - vc4->dirty = ~0; vc4->fd = screen->fd; util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer), @@ -450,8 +257,14 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) if (!vc4->primconvert) goto fail; + vc4->uploader = u_upload_create(pctx, 16 * 1024, + PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STREAM); + vc4_debug |= saved_shaderdb_flag; + vc4->sample_mask = (1 << VC4_MAX_SAMPLES) - 1; + return &vc4->base; fail: