so->instance_bufs = 0;
so->need_conversion = FALSE;
+ memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
+
transkey.nr_elements = 0;
transkey.output_stride = 0;
for (i = 0; i < num_elements; ++i) {
const struct pipe_vertex_element *ve = &elements[i];
const unsigned vbi = ve->vertex_buffer_index;
+ unsigned size;
enum pipe_format fmt = ve->src_format;
so->element[i].pipe = elements[i];
}
so->element[i].state |= i;
+ size = util_format_get_blocksize(fmt);
+ if (so->vb_access_size[vbi] < (ve->src_offset + size))
+ so->vb_access_size[vbi] = ve->src_offset + size;
+
if (1) {
unsigned j = transkey.nr_elements++;
*base = 0;
*size = nv50->vtxbuf[vbi].buffer->width0;
} else {
- assert(nv50->vbo_max_index != ~0);
- *base = nv50->vbo_min_index * nv50->vtxbuf[vbi].stride;
- *size = (nv50->vbo_max_index -
- nv50->vbo_min_index + 1) * nv50->vtxbuf[vbi].stride;
+ /* NOTE: if there are user buffers, we *must* have index bounds */
+ assert(nv50->vb_elt_limit != ~0);
+ *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride;
+ *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride +
+ nv50->vertex->vb_access_size[vbi];
}
}
static void
-nv50_prevalidate_vbufs(struct nv50_context *nv50)
+nv50_prevalidate_vbufs(struct nv50_context *nv50, unsigned limits[])
{
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ struct nouveau_bo *bo;
struct pipe_vertex_buffer *vb;
struct nv04_resource *buf;
int i;
for (i = 0; i < nv50->num_vtxbufs; ++i) {
vb = &nv50->vtxbuf[i];
+ limits[i] = 0;
if (!vb->stride)
continue;
buf = nv04_resource(vb->buffer);
- /* NOTE: user buffers with temporary storage count as mapped by GPU */
- if (!nouveau_resource_mapped_by_gpu(vb->buffer)) {
+ if (nouveau_resource_mapped_by_gpu(vb->buffer)) {
+ BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ } else {
if (nv50->vbo_push_hint) {
nv50->vbo_fifo = ~0;
- continue;
+ return;
+ }
+ nv50->base.vbo_dirty = TRUE;
+
+ if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {
+ assert(vb->stride > vb->buffer_offset);
+ nv50->vbo_user |= 1 << i;
+ nv50_vbuf_range(nv50, i, &base, &size);
+ limits[i] = base + size - 1;
+ bo = nouveau_scratch_data(&nv50->base, buf, base, size);
+ if (bo)
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
} else {
- if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {
- nv50->vbo_user |= 1 << i;
- assert(vb->stride > vb->buffer_offset);
- nv50_vbuf_range(nv50, i, &base, &size);
- nouveau_user_buffer_upload(&nv50->base, buf, base, size);
- } else {
- nouveau_buffer_migrate(&nv50->base, buf, NOUVEAU_BO_GART);
- }
- nv50->base.vbo_dirty = TRUE;
+ if (nouveau_buffer_migrate(&nv50->base, buf, NOUVEAU_BO_GART))
+ BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
}
}
- BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
}
}
static void
nv50_update_user_vbufs(struct nv50_context *nv50)
{
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ struct nouveau_bo *bo;
struct nouveau_pushbuf *push = nv50->base.pushbuf;
uint32_t base, offset, size;
int i;
uint32_t written = 0;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
-
for (i = 0; i < nv50->vertex->num_elements; ++i) {
struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;
const int b = ve->vertex_buffer_index;
struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
struct nv04_resource *buf = nv04_resource(vb->buffer);
- if (!(nv50->vbo_user & (1 << b))) {
- BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ if (!(nv50->vbo_user & (1 << b)))
continue;
- }
if (!vb->stride) {
nv50_emit_vtxattr(nv50, vb, ve, i);
if (!(written & (1 << b))) {
written |= 1 << b;
- nouveau_user_buffer_upload(&nv50->base, buf, base, size);
+ bo = nouveau_scratch_data(&nv50->base, buf, base, size);
+ if (bo)
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
}
offset = vb->buffer_offset + ve->src_offset;
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
-
- BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
}
nv50->base.vbo_dirty = TRUE;
}
static INLINE void
nv50_release_user_vbufs(struct nv50_context *nv50)
{
- uint32_t vbo_user = nv50->vbo_user;
-
- while (vbo_user) {
- int i = ffs(vbo_user) - 1;
- vbo_user &= ~(1 << i);
-
- nouveau_buffer_release_gpu_storage(nv04_resource(nv50->vtxbuf[i].buffer));
+ if (nv50->vbo_user) {
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+ nouveau_scratch_done(&nv50->base);
}
}
struct pipe_vertex_buffer *vb;
struct nv50_vertex_element *ve;
unsigned i;
+ unsigned limits[PIPE_MAX_ATTRIBS]; /* user vertex buffer limits */
if (unlikely(vertex->need_conversion)) {
nv50->vbo_fifo = ~0;
nv50->vbo_user = 0;
} else {
- nv50_prevalidate_vbufs(nv50);
+ nv50_prevalidate_vbufs(nv50, limits);
}
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), vertex->num_elements);
for (i = 0; i < vertex->num_elements; ++i) {
struct nv04_resource *res;
- unsigned size, offset;
+ unsigned limit, offset;
ve = &vertex->element[i];
vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index];
continue;
}
- size = vb->buffer->width0;
offset = ve->pipe.src_offset + vb->buffer_offset;
+ limit = limits[ve->pipe.vertex_buffer_index];
+ if (!limit)
+ limit = vb->buffer->width0 - 1;
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
- PUSH_DATAh(push, res->address + size - 1);
- PUSH_DATA (push, res->address + size - 1);
+ PUSH_DATAh(push, res->address + limit);
+ PUSH_DATA (push, res->address + limit);
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
+ nv50->vb_elt_first = info->min_index + info->index_bias;
+ nv50->vb_elt_limit = info->max_index - info->min_index;
+
/* For picking only a few vertices from a large user buffer, push is better,
* if index count is larger and we expect repeated vertices, suggest upload.
*/
nv50->vbo_push_hint = /* the 64 is heuristic */
- !(info->indexed &&
- ((info->max_index - info->min_index + 64) < info->count));
-
- nv50->vbo_min_index = info->min_index;
- nv50->vbo_max_index = info->max_index;
+ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
if (nv50->vbo_push_hint != !!nv50->vbo_fifo)
nv50->dirty |= NV50_NEW_ARRAYS;