*/
#include "util/u_format.h"
+#include "util/u_format_zs.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
#include "util/u_string.h"
#include <errno.h>
+static void
+fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+ int i;
+
+ /* Go through the entire state and see if the resource is bound
+ * anywhere. If it is, mark the relevant state as dirty. This is called on
+ * realloc_bo.
+ */
+
+ /* Constbufs */
+ for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
+ if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
+ ctx->dirty |= FD_DIRTY_CONSTBUF;
+ if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
+ ctx->dirty |= FD_DIRTY_CONSTBUF;
+ }
+
+ /* VBOs */
+ for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
+ if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
+ ctx->dirty |= FD_DIRTY_VTXBUF;
+ }
+
+ /* Index buffer */
+ if (ctx->indexbuf.buffer == prsc)
+ ctx->dirty |= FD_DIRTY_INDEXBUF;
+
+ /* Textures */
+ for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
+ if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
+ ctx->dirty |= FD_DIRTY_VERTTEX;
+ }
+ for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
+ if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
+ ctx->dirty |= FD_DIRTY_FRAGTEX;
+ }
+}
+
static void
realloc_bo(struct fd_resource *rsc, uint32_t size)
{
rsc->bo = fd_bo_new(screen->dev, size, flags);
rsc->timestamp = 0;
- rsc->dirty = false;
+ rsc->dirty = rsc->reading = rsc->writing = false;
+ list_delinit(&rsc->list);
+ util_range_set_empty(&rsc->valid_buffer_range);
+}
+
+/* Currently this is only used for flushing Z32_S8 texture transfers, but
+ * eventually it should handle everything.
+ */
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ struct fd_resource *rsc = fd_resource(trans->base.resource);
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+ struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
+ enum pipe_format format = trans->base.resource->format;
+
+ float *depth = fd_bo_map(rsc->bo) + slice->offset +
+ (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
+ uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+ (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
+
+ assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+ format == PIPE_FORMAT_X32_S8X24_UINT);
+
+ if (format != PIPE_FORMAT_X32_S8X24_UINT)
+ util_format_z32_float_s8x24_uint_unpack_z_float(
+ depth, slice->pitch * 4,
+ trans->staging, trans->base.stride,
+ box->width, box->height);
+
+ util_format_z32_float_s8x24_uint_unpack_s_8uint(
+ stencil, sslice->pitch,
+ trans->staging, trans->base.stride,
+ box->width, box->height);
}
static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
{
- struct fd_context *ctx = fd_context(pctx);
struct fd_resource *rsc = fd_resource(ptrans->resource);
+ struct fd_transfer *trans = fd_transfer(ptrans);
- if (rsc->dirty)
- fd_context_render(pctx);
+ if (ptrans->resource->target == PIPE_BUFFER)
+ util_range_add(&rsc->valid_buffer_range,
+ ptrans->box.x + box->x,
+ ptrans->box.x + box->x + box->width);
- if (rsc->timestamp) {
- fd_pipe_wait(ctx->screen->pipe, rsc->timestamp);
- rsc->timestamp = 0;
- }
+ if (trans->staging)
+ fd_resource_flush(trans, box);
}
static void
{
struct fd_context *ctx = fd_context(pctx);
struct fd_resource *rsc = fd_resource(ptrans->resource);
- if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ struct fd_transfer *trans = fd_transfer(ptrans);
+
+ if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct pipe_box box;
+ u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
+ fd_resource_flush(trans, &box);
+ }
+
+ if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
fd_bo_cpu_fini(rsc->bo);
+ if (rsc->stencil)
+ fd_bo_cpu_fini(rsc->stencil->bo);
+ }
+
+ util_range_add(&rsc->valid_buffer_range,
+ ptrans->box.x,
+ ptrans->box.x + ptrans->box.width);
+
pipe_resource_reference(&ptrans->resource, NULL);
util_slab_free(&ctx->transfer_pool, ptrans);
+
+ if (trans->staging)
+ free(trans->staging);
}
static void *
struct fd_context *ctx = fd_context(pctx);
struct fd_resource *rsc = fd_resource(prsc);
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+ struct fd_transfer *trans;
struct pipe_transfer *ptrans;
enum pipe_format format = prsc->format;
uint32_t op = 0;
+ uint32_t offset;
char *buf;
int ret = 0;
- DBG("prsc=%p, level=%u, usage=%x", prsc, level, usage);
+ DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
+ box->width, box->height, box->x, box->y);
ptrans = util_slab_alloc(&ctx->transfer_pool);
if (!ptrans)
return NULL;
/* util_slab_alloc() doesn't zero: */
- memset(ptrans, 0, sizeof(*ptrans));
+ trans = fd_transfer(ptrans);
+ memset(trans, 0, sizeof(*trans));
pipe_resource_reference(&ptrans->resource, prsc);
ptrans->level = level;
if (usage & PIPE_TRANSFER_WRITE)
op |= DRM_FREEDRENO_PREP_WRITE;
- /* some state trackers (at least XA) don't do this.. */
- if (!(usage & (PIPE_TRANSFER_FLUSH_EXPLICIT | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)))
- fd_resource_transfer_flush_region(pctx, ptrans, box);
-
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
realloc_bo(rsc, fd_bo_size(rsc->bo));
+ if (rsc->stencil)
+ realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
+ fd_invalidate_resource(ctx, prsc);
+ } else if ((usage & PIPE_TRANSFER_WRITE) &&
+ prsc->target == PIPE_BUFFER &&
+ !util_ranges_intersect(&rsc->valid_buffer_range,
+ box->x, box->x + box->width)) {
+ /* We are trying to write to a previously uninitialized range. No need
+ * to wait.
+ */
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* If the GPU is writing to the resource, or if it is reading from the
+ * resource and we're trying to write to it, flush the renders.
+ */
+ if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
+ ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading) ||
+ ((ptrans->usage & PIPE_TRANSFER_READ) && rsc->writing))
+ fd_context_render(pctx);
+
+ /* The GPU keeps track of how the various bo's are being used, and
+ * will wait if necessary for the proper operation to have
+ * completed.
+ */
ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
if (ret)
goto fail;
return NULL;
}
+ if (rsc->layer_first) {
+ offset = slice->offset +
+ box->y / util_format_get_blockheight(format) * ptrans->stride +
+ box->x / util_format_get_blockwidth(format) * rsc->cpp +
+ box->z * rsc->layer_size;
+ } else {
+ offset = slice->offset +
+ box->y / util_format_get_blockheight(format) * ptrans->stride +
+ box->x / util_format_get_blockwidth(format) * rsc->cpp +
+ box->z * slice->size0;
+ }
+
+ if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+ prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+ trans->base.stride = trans->base.box.width * rsc->cpp * 2;
+ trans->staging = malloc(trans->base.stride * trans->base.box.height);
+ if (!trans->staging)
+ goto fail;
+
+ /* if we're not discarding the whole range (or resource), we must copy
+ * the real data in.
+ */
+ if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+ PIPE_TRANSFER_DISCARD_RANGE))) {
+ struct fd_resource_slice *sslice =
+ fd_resource_slice(rsc->stencil, level);
+ void *sbuf = fd_bo_map(rsc->stencil->bo);
+ if (!sbuf)
+ goto fail;
+
+ float *depth = (float *)(buf + slice->offset +
+ box->y * slice->pitch * 4 + box->x * 4);
+ uint8_t *stencil = sbuf + sslice->offset +
+ box->y * sslice->pitch + box->x;
+
+ if (format != PIPE_FORMAT_X32_S8X24_UINT)
+ util_format_z32_float_s8x24_uint_pack_z_float(
+ trans->staging, trans->base.stride,
+ depth, slice->pitch * 4,
+ box->width, box->height);
+
+ util_format_z32_float_s8x24_uint_pack_s_8uint(
+ trans->staging, trans->base.stride,
+ stencil, sslice->pitch,
+ box->width, box->height);
+ }
+
+ buf = trans->staging;
+ offset = 0;
+ }
+
*pptrans = ptrans;
- return buf + slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * slice->size0;
+ return buf + offset;
fail:
fd_resource_transfer_unmap(pctx, ptrans);
struct fd_resource *rsc = fd_resource(prsc);
if (rsc->bo)
fd_bo_del(rsc->bo);
+ list_delinit(&rsc->list);
+ util_range_destroy(&rsc->valid_buffer_range);
FREE(rsc);
}
};
static uint32_t
-setup_slices(struct fd_resource *rsc)
+setup_slices(struct fd_resource *rsc, uint32_t alignment)
{
struct pipe_resource *prsc = &rsc->base.b;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
uint32_t depth = prsc->depth0;
+ /* in layer_first layout, the level (slice) contains just one
+ * layer (since in fact the layer contains the slices)
+ */
+ uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
for (level = 0; level <= prsc->last_level; level++) {
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
- slice->pitch = align(width, 32);
+ slice->pitch = width = align(width, 32);
slice->offset = size;
- slice->size0 = slice->pitch * height * rsc->cpp;
-
- size += slice->size0 * depth * prsc->array_size;
+ /* 1d array and 2d array textures must all have the same layer size
+ * for each miplevel on a3xx. 3d textures can have different layer
+ * sizes for high levels, but the hw auto-sizer is buggy (or at least
+ * different than what this code does), so as soon as the layer size
+ * range gets into range, we stop reducing it.
+ */
+ if (prsc->target == PIPE_TEXTURE_3D && (
+ level == 1 ||
+ (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
+ slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+ else if (level == 0 || rsc->layer_first || alignment == 1)
+ slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+ else
+ slice->size0 = rsc->slices[level - 1].size0;
+
+ size += slice->size0 * depth * layers_in_level;
width = u_minify(width, 1);
height = u_minify(height, 1);
return size;
}
-/* 2d array and 3d textures seem to want their layers aligned to
- * page boundaries
- */
static uint32_t
-setup_slices_array(struct fd_resource *rsc)
+slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
{
- struct pipe_resource *prsc = &rsc->base.b;
- uint32_t level, size = 0;
- uint32_t width = prsc->width0;
- uint32_t height = prsc->height0;
- uint32_t depth = prsc->depth0;
-
- for (level = 0; level <= prsc->last_level; level++) {
- struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
-
- slice->pitch = align(width, 32);
- slice->offset = size;
- slice->size0 = align(slice->pitch * height * rsc->cpp, 4096);
-
- size += slice->size0 * depth * prsc->array_size;
-
- width = u_minify(width, 1);
- height = u_minify(height, 1);
- depth = u_minify(depth, 1);
+ /* on a3xx, 2d array and 3d textures seem to want their
+ * layers aligned to page boundaries:
+ */
+ switch (tmpl->target) {
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return 4096;
+ default:
+ return 1;
}
-
- return size;
}
/**
*prsc = *tmpl;
pipe_reference_init(&prsc->reference, 1);
+ list_inithead(&rsc->list);
prsc->screen = pscreen;
+ util_range_init(&rsc->valid_buffer_range);
+
rsc->base.vtbl = &fd_resource_vtbl;
- rsc->cpp = util_format_get_blocksize(tmpl->format);
+ if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
+ else
+ rsc->cpp = util_format_get_blocksize(tmpl->format);
assert(rsc->cpp);
- switch (tmpl->target) {
- case PIPE_TEXTURE_3D:
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D_ARRAY:
- size = setup_slices_array(rsc);
- break;
- default:
- size = setup_slices(rsc);
- break;
+ if (is_a4xx(fd_screen(pscreen))) {
+ switch (tmpl->target) {
+ case PIPE_TEXTURE_3D:
+ /* TODO 3D_ARRAY? */
+ rsc->layer_first = false;
+ break;
+ default:
+ rsc->layer_first = true;
+ break;
+ }
+ }
+
+ size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+
+ if (rsc->layer_first) {
+ rsc->layer_size = align(size, 4096);
+ size = rsc->layer_size * prsc->array_size;
}
realloc_bo(rsc, size);
if (!rsc->bo)
goto fail;
+ /* There is no native Z32F_S8 sampling or rendering format, so this must
+ * be emulated via two separate textures. The depth texture still keeps
+ * its Z32F_S8 format though, and we also keep a reference to a separate
+ * S8 texture.
+ */
+ if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+ struct pipe_resource stencil = *tmpl;
+ stencil.format = PIPE_FORMAT_S8_UINT;
+ rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
+ if (!rsc->stencil)
+ goto fail;
+ }
+
return prsc;
fail:
fd_resource_destroy(pscreen, prsc);
*prsc = *tmpl;
pipe_reference_init(&prsc->reference, 1);
+ list_inithead(&rsc->list);
prsc->screen = pscreen;
+ util_range_init(&rsc->valid_buffer_range);
+
rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &slice->pitch);
if (!rsc->bo)
goto fail;
{
struct fd_resource *rsc = fd_resource(prsc);
- if (rsc->dirty)
+ if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
fd_context_render(pctx);
}