u_upload_mgr sets it, so that util_range_add can skip the lock.
The time spent in tc_transfer_flush_region decreases from 0.8% to 0.2%
in torcs on radeonsi.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
#define U_RANGE_H
#include "os/os_thread.h"
-
+#include "pipe/p_state.h"
#include "util/u_math.h"
#include "util/simple_mtx.h"
/* This is like a union of two sets. */
static inline void
-util_range_add(struct util_range *range, unsigned start, unsigned end)
+util_range_add(struct pipe_resource *resource, struct util_range *range,
+ unsigned start, unsigned end)
{
if (start < range->start || end > range->end) {
- simple_mtx_lock(&range->write_mutex);
- range->start = MIN2(start, range->start);
- range->end = MAX2(end, range->end);
- simple_mtx_unlock(&range->write_mutex);
+ if (resource->flags & PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE) {
+ range->start = MIN2(start, range->start);
+ range->end = MAX2(end, range->end);
+ } else {
+ simple_mtx_lock(&range->write_mutex);
+ range->start = MIN2(start, range->start);
+ range->end = MAX2(end, range->end);
+ simple_mtx_unlock(&range->write_mutex);
+ }
}
}
struct threaded_resource *tres =
threaded_resource(images[i].resource);
- util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
+ util_range_add(&tres->b, &tres->valid_buffer_range,
+ images[i].u.buf.offset,
images[i].u.buf.offset + images[i].u.buf.size);
}
}
if (src->buffer) {
struct threaded_resource *tres = threaded_resource(src->buffer);
- util_range_add(&tres->valid_buffer_range, src->buffer_offset,
+ util_range_add(&tres->b, &tres->valid_buffer_range,
+ src->buffer_offset,
src->buffer_offset + src->buffer_size);
}
}
struct pipe_stream_output_target *view;
tc_sync(threaded_context(_pipe));
- util_range_add(&tres->valid_buffer_range, buffer_offset,
+ util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
view = pipe->create_stream_output_target(pipe, res, buffer_offset,
ttrans->staging, 0, &src_box);
}
- util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
+ util_range_add(&tres->b, tres->base_valid_buffer_range,
+ box->x, box->x + box->width);
}
static void
return;
}
- util_range_add(&tres->valid_buffer_range, offset, offset + size);
+ util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
/* The upload is small. Enqueue it. */
struct tc_buffer_subdata *p =
p->src_box = *src_box;
if (dst->target == PIPE_BUFFER)
- util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
+ util_range_add(&tdst->b, &tdst->valid_buffer_range,
+ dstx, dstx + src_box->width);
}
static void
memcpy(p->clear_value, clear_value, clear_value_size);
p->clear_value_size = clear_value_size;
- util_range_add(&tres->valid_buffer_range, offset, offset + size);
+ util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
}
struct tc_clear_texture {
buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
buffer.bind = upload->bind;
buffer.usage = upload->usage;
- buffer.flags = upload->flags;
+ buffer.flags = upload->flags | PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
struct fd_resource *rsc = fd_resource(ptrans->resource);
if (ptrans->resource->target == PIPE_BUFFER)
- util_range_add(&rsc->valid_buffer_range,
+ util_range_add(&rsc->base, &rsc->valid_buffer_range,
ptrans->box.x + box->x,
ptrans->box.x + box->x + box->width);
}
fd_bo_cpu_fini(rsc->bo);
}
- util_range_add(&rsc->valid_buffer_range,
+ util_range_add(&rsc->base, &rsc->valid_buffer_range,
ptrans->box.x,
ptrans->box.x + ptrans->box.width);
target->buffer_size = buffer_size;
assert(rsc->base.target == PIPE_BUFFER);
- util_range_add(&rsc->valid_buffer_range,
+ util_range_add(&rsc->base, &rsc->valid_buffer_range,
buffer_offset, buffer_offset + buffer_size);
return target;
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
if (dst_res->base.target == PIPE_BUFFER)
- util_range_add(&dst_res->valid_buffer_range, dst_x0, dst_x1);
+ util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dst_x0, dst_x1);
struct blorp_batch blorp_batch;
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
if (dst->target == PIPE_BUFFER)
- util_range_add(&dst_res->valid_buffer_range, dstx, dstx + src_box->width);
+ util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
struct blorp_address src_addr = {
}
if (p_res->target == PIPE_BUFFER)
- util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+ util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
iris_batch_maybe_flush(batch, 1500);
return NULL;
}
- util_range_add(&res->valid_buffer_range, 0, templ->width0);
+ util_range_add(&res->base, &res->valid_buffer_range, 0, templ->width0);
return &res->base;
}
box->x + box->width);
if (usage & PIPE_TRANSFER_WRITE)
- util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+ util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
/* Avoid using GPU copies for persistent/coherent buffers, as the idea
* there is to access them simultaneously on the CPU & GPU. This also
if (map->dest_had_defined_contents)
history_flush |= iris_flush_bits_for_history(res);
- util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+ util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
}
if (history_flush & ~PIPE_CONTROL_CS_STALL) {
&image_params[start_slot + i],
&res->surf, &view);
} else {
- util_range_add(&res->valid_buffer_range, img->u.buf.offset,
+ util_range_add(&res->base, &res->valid_buffer_range, img->u.buf.offset,
img->u.buf.offset + img->u.buf.size);
fill_buffer_surface_state(&screen->isl_dev, res, map,
res->bind_history |= PIPE_BIND_SHADER_BUFFER;
res->bind_stages |= 1 << stage;
- util_range_add(&res->valid_buffer_range, ssbo->buffer_offset,
+ util_range_add(&res->base, &res->valid_buffer_range, ssbo->buffer_offset,
ssbo->buffer_offset + ssbo->buffer_size);
} else {
pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);
cso->base.buffer_size = buffer_size;
cso->base.context = ctx;
- util_range_add(&res->valid_buffer_range, buffer_offset,
+ util_range_add(&res->base, &res->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
upload_state(ctx->stream_uploader, &cso->offset, sizeof(uint32_t), 4);
if (tx->map)
nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
- util_range_add(&buf->valid_buffer_range,
+ util_range_add(&buf->base, &buf->valid_buffer_range,
tx->base.box.x + box->x,
tx->base.box.x + box->x + box->width);
}
if (tx->map)
nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
- util_range_add(&buf->valid_buffer_range,
+ util_range_add(&buf->base, &buf->valid_buffer_range,
tx->base.box.x, tx->base.box.x + tx->base.box.width);
}
&src->base, 0, &src_box);
}
- util_range_add(&dst->valid_buffer_range, dstx, dstx + size);
+ util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
}
buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
util_range_init(&buffer->valid_buffer_range);
- util_range_add(&buffer->valid_buffer_range, 0, bytes);
+ util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
return &buffer->base;
}
pipe_reference_init(&targ->pipe.reference, 1);
assert(buf->base.target == PIPE_BUFFER);
- util_range_add(&buf->valid_buffer_range, offset, offset + size);
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
return &targ->pipe;
}
return;
}
- util_range_add(&buf->valid_buffer_range, offset, offset + size);
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
assert(size % data_size == 0);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->base, &res->valid_buffer_range,
nvc0->buffers[s][i].buffer_offset,
nvc0->buffers[s][i].buffer_offset +
nvc0->buffers[s][i].buffer_size);
result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
ready);
- util_range_add(&buf->valid_buffer_range, offset,
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset,
offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
nvc0_resource_validate(buf, NOUVEAU_BO_WR);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- util_range_add(&buf->valid_buffer_range, offset,
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset,
offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
nvc0_resource_validate(buf, NOUVEAU_BO_WR);
pipe_reference_init(&targ->pipe.reference, 1);
assert(buf->base.target == PIPE_BUFFER);
- util_range_add(&buf->valid_buffer_range, offset, offset + size);
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
return &targ->pipe;
}
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->base, &res->valid_buffer_range,
nvc0->buffers[s][i].buffer_offset,
nvc0->buffers[s][i].buffer_offset +
nvc0->buffers[s][i].buffer_size);
return;
}
- util_range_add(&buf->valid_buffer_range, offset, offset + size);
+ util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
assert(size % data_size == 0);
assert(view->resource->target == PIPE_BUFFER);
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->base, &res->valid_buffer_range,
view->u.buf.offset,
view->u.buf.offset + view->u.buf.size);
}
res->flags = (access & 3) << 8;
if (res->buf->base.target == PIPE_BUFFER &&
access & PIPE_IMAGE_ACCESS_WRITE)
- util_range_add(&res->buf->valid_buffer_range,
+ util_range_add(&res->buf->base, &res->buf->valid_buffer_range,
tic->pipe.u.buf.offset,
tic->pipe.u.buf.offset + tic->pipe.u.buf.size);
list_add(&res->list, &nvc0->img_head);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->base, &res->valid_buffer_range,
nvc0->buffers[s][i].buffer_offset,
nvc0->buffers[s][i].buffer_offset +
nvc0->buffers[s][i].buffer_size);
}
- util_range_add(&prsrc->valid_buffer_range,
+ util_range_add(&prsrc->base, &prsrc->valid_buffer_range,
transfer->box.x,
transfer->box.x + transfer->box.width);
struct panfrost_resource *rsc = pan_resource(transfer->resource);
if (transfer->resource->target == PIPE_BUFFER) {
- util_range_add(&rsc->valid_buffer_range,
+ util_range_add(&rsc->base, &rsc->valid_buffer_range,
transfer->box.x + box->x,
transfer->box.x + box->x + box->width);
} else {
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&rdst->valid_buffer_range, dst_offset,
+ util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += rdst->gpu_address;
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+ util_range_add(dst, &r600_resource(dst)->valid_buffer_range, offset,
offset + size);
offset += r600_resource(dst)->gpu_address;
surf->cb_color_view = 0;
/* Set the buffer range the GPU will have access to: */
- util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
+ util_range_add(pipe_buffer, &r600_resource(pipe_buffer)->valid_buffer_range,
0, pipe_buffer->width0);
}
ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
}
- util_range_add(&rbuffer->valid_buffer_range, box->x,
+ util_range_add(&rbuffer->b.b, &rbuffer->valid_buffer_range, box->x,
box->x + box->width);
}
rbuffer->domains = RADEON_DOMAIN_GTT;
rbuffer->flags = 0;
rbuffer->b.is_user_ptr = true;
- util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
- util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+ util_range_add(&rbuffer->b.b, &rbuffer->valid_buffer_range, 0, templ->width0);
+ util_range_add(&rbuffer->b.b, &rbuffer->b.valid_buffer_range, 0, templ->width0);
/* Convert a user pointer to a buffer. */
rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+ util_range_add(dst, &r600_resource(dst)->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += r600_resource(dst)->gpu_address;
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&rdst->valid_buffer_range, dst_offset,
+ util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,
dst_offset + size);
size >>= 2; /* convert to dwords */
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
- util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ util_range_add(buffer, &rbuffer->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
return &t->b;
}
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&sdst->valid_buffer_range, dst_offset,
+ util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += sdst->gpu_address;
box->x, src_offset, box->width);
}
- util_range_add(&buf->valid_buffer_range, box->x,
+ util_range_add(&buf->b.b, &buf->valid_buffer_range, box->x,
box->x + box->width);
}
buf->domains = RADEON_DOMAIN_GTT;
buf->flags = 0;
buf->b.is_user_ptr = true;
- util_range_add(&buf->valid_buffer_range, 0, templ->width0);
- util_range_add(&buf->b.valid_buffer_range, 0, templ->width0);
+ util_range_add(&buf->b.b, &buf->valid_buffer_range, 0, templ->width0);
+ util_range_add(&buf->b.b, &buf->b.valid_buffer_range, 0, templ->width0);
/* Convert a user pointer to a buffer. */
buf->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
if (sdst)
- util_range_add(&sdst->valid_buffer_range, offset, offset + size);
+ util_range_add(dst, &sdst->valid_buffer_range, offset, offset + size);
/* Flush the caches. */
if (sdst && !(user_flags & SI_CPDMA_SKIP_GFX_SYNC)) {
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&si_resource(dst)->valid_buffer_range, dst_offset,
+ util_range_add(dst, &si_resource(dst)->valid_buffer_range, dst_offset,
dst_offset + size);
}
if (res->b.b.target != PIPE_BUFFER)
return;
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->b.b, &res->valid_buffer_range,
view->u.buf.offset,
view->u.buf.offset + view->u.buf.size);
}
buffers->enabled_mask |= 1u << slot;
sctx->descriptors_dirty |= 1u << descriptors_idx;
- util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
+ util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
sbuffer->buffer_offset + sbuffer->buffer_size);
}
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&sdst->valid_buffer_range, dst_offset,
+ util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += sdst->gpu_address;
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&dst->valid_buffer_range, offset, offset + 8);
+ util_range_add(&dst->b.b, &dst->valid_buffer_range, offset, offset + 8);
assert(va % 8 == 0);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&sdst->valid_buffer_range, offset, offset + size);
+ util_range_add(dst, &sdst->valid_buffer_range, offset, offset + size);
offset += sdst->gpu_address;
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
- util_range_add(&buf->valid_buffer_range, buffer_offset,
+ util_range_add(&buf->b.b, &buf->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
return &t->b;
}
*
* We'll end up flushing 25 --> 70.
*/
- util_range_add(&trans->range, box->x, box->x + box->width);
+ util_range_add(transfer->resource, &trans->range, box->x, box->x + box->width);
}
static const struct u_resource_vtbl virgl_buffer_vtbl =
struct virgl_resource *sres = virgl_resource(src);
if (dres->u.b.target == PIPE_BUFFER)
- util_range_add(&dres->valid_buffer_range, dstx, dstx + src_box->width);
+ util_range_add(&dres->u.b, &dres->valid_buffer_range, dstx, dstx + src_box->width);
virgl_resource_dirty(dres, dst_level);
virgl_encode_resource_copy_region(vctx, dres,
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
virgl_encoder_write_res(ctx, res);
- util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+ util_range_add(&res->u.b, &res->valid_buffer_range, buffers[i].buffer_offset,
buffers[i].buffer_offset + buffers[i].buffer_size);
virgl_resource_dirty(res, 0);
} else {
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
virgl_encoder_write_res(ctx, res);
- util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+ util_range_add(&res->u.b, &res->valid_buffer_range, buffers[i].buffer_offset,
buffers[i].buffer_offset + buffers[i].buffer_size);
virgl_resource_dirty(res, 0);
} else {
virgl_encoder_write_res(ctx, res);
if (res->u.b.target == PIPE_BUFFER) {
- util_range_add(&res->valid_buffer_range, images[i].u.buf.offset,
+ util_range_add(&res->u.b, &res->valid_buffer_range, images[i].u.buf.offset,
images[i].u.buf.offset + images[i].u.buf.size);
}
virgl_resource_dirty(res, images[i].u.tex.level);
query->result_size = (query_type == PIPE_QUERY_TIMESTAMP ||
query_type == PIPE_QUERY_TIME_ELAPSED) ? 8 : 4;
- util_range_add(&query->buf->valid_buffer_range, 0,
+ util_range_add(&query->buf->u.b, &query->buf->valid_buffer_range, 0,
sizeof(struct virgl_host_query_state));
virgl_resource_dirty(query->buf, 0);
}
if (usage & PIPE_TRANSFER_WRITE)
- util_range_add(&vres->valid_buffer_range, box->x, box->x + box->width);
+ util_range_add(&vres->u.b, &vres->valid_buffer_range, box->x, box->x + box->width);
}
*transfer = &trans->base;
likely(!(virgl_debug & VIRGL_DEBUG_XFER)) &&
virgl_transfer_queue_extend_buffer(&vctx->queue,
vbuf->hw_res, offset, size, data)) {
- util_range_add(&vbuf->valid_buffer_range, offset, offset + size);
+ util_range_add(&vbuf->u.b, &vbuf->valid_buffer_range, offset, offset + size);
return;
}
t->handle = handle;
res->bind_history |= PIPE_BIND_STREAM_OUTPUT;
- util_range_add(&res->valid_buffer_range, buffer_offset,
+ util_range_add(&res->u.b, &res->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
virgl_resource_dirty(res, 0);
#define PIPE_RESOURCE_FLAG_MAP_COHERENT (1 << 1)
#define PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY (1 << 2)
#define PIPE_RESOURCE_FLAG_SPARSE (1 << 3)
+#define PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE (1 << 4)
#define PIPE_RESOURCE_FLAG_DRV_PRIV (1 << 8) /* driver/winsys private */
#define PIPE_RESOURCE_FLAG_ST_PRIV (1 << 24) /* state-tracker/winsys private */