From: Christoph Bumiller Date: Thu, 8 Mar 2012 20:41:41 +0000 (+0100) Subject: nv50,nvc0: fix handling of user vbufs with stride < access size X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0e8ad4cc749626eebbc7f916e551bbfd80b75023;p=mesa.git nv50,nvc0: fix handling of user vbufs with stride < access size --- diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 0dd6c16bb48..ab185194786 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -116,8 +116,8 @@ struct nv50_context { struct pipe_index_buffer idxbuf; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ - unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ - unsigned vbo_max_index; + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; unsigned num_textures[3]; diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index b056e19ea50..188406da600 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -44,6 +44,7 @@ struct nv50_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; boolean need_conversion; unsigned vertex_size; unsigned packet_vertex_limit; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index b9f352d7df8..bc01e69decf 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -60,12 +60,15 @@ nv50_vertex_state_create(struct pipe_context *pipe, so->instance_bufs = 0; so->need_conversion = FALSE; + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + transkey.nr_elements = 0; transkey.output_stride = 0; for (i = 0; i < num_elements; ++i) { const struct pipe_vertex_element *ve = &elements[i]; const unsigned vbi = ve->vertex_buffer_index; + unsigned size; enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; @@ -86,6 +89,10 @@ nv50_vertex_state_create(struct pipe_context *pipe, } so->element[i].state |= i; + size = util_format_get_blocksize(fmt); + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; + if (1) { unsigned j = transkey.nr_elements++; @@ -176,15 +183,16 @@ nv50_vbuf_range(struct nv50_context *nv50, int vbi, *base = 0; *size = nv50->vtxbuf[vbi].buffer->width0; } else { - assert(nv50->vbo_max_index != ~0); - *base = nv50->vbo_min_index * nv50->vtxbuf[vbi].stride; - *size = (nv50->vbo_max_index - - nv50->vbo_min_index + 1) * nv50->vtxbuf[vbi].stride; + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nv50->vb_elt_limit != ~0); + *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride; + *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride + + nv50->vertex->vb_access_size[vbi]; } } static void -nv50_prevalidate_vbufs(struct nv50_context *nv50) +nv50_prevalidate_vbufs(struct nv50_context *nv50, unsigned limits[]) { const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; struct nouveau_bo *bo; @@ -199,6 +207,7 @@ nv50_prevalidate_vbufs(struct nv50_context *nv50) for (i = 0; i < nv50->num_vtxbufs; ++i) { vb = &nv50->vtxbuf[i]; + limits[i] = 0; if (!vb->stride) continue; buf = nv04_resource(vb->buffer); @@ -216,6 +225,7 @@ nv50_prevalidate_vbufs(struct nv50_context *nv50) assert(vb->stride > vb->buffer_offset); nv50->vbo_user |= 1 << i; nv50_vbuf_range(nv50, i, &base, &size); + limits[i] = base + size - 1; bo = nouveau_scratch_data(&nv50->base, buf, base, size); if (bo) BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); @@ -287,12 +297,13 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; unsigned i; + unsigned limits[PIPE_MAX_ATTRIBS]; /* user vertex buffer limits */ if (unlikely(vertex->need_conversion)) { nv50->vbo_fifo = ~0; nv50->vbo_user = 0; } else { - nv50_prevalidate_vbufs(nv50); + nv50_prevalidate_vbufs(nv50, limits); } BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), vertex->num_elements); @@ -310,7 +321,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) for (i = 0; i < vertex->num_elements; ++i) { struct nv04_resource *res; - unsigned size, offset; + unsigned limit, offset; ve = &vertex->element[i]; vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index]; @@ -338,14 +349,16 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) continue; } - size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + limit = limits[ve->pipe.vertex_buffer_index]; + if (!limit) + limit = vb->buffer->width0 - 1; BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - PUSH_DATAh(push, res->address + size - 1); - PUSH_DATA (push, res->address + size - 1); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2); PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); @@ -626,15 +639,15 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nv50->vb_elt_first = info->min_index + info->index_bias; + nv50->vb_elt_limit = info->max_index - info->min_index; + /* For picking only a few vertices from a large user buffer, push is better, * if index count is larger and we expect repeated vertices, suggest upload. */ nv50->vbo_push_hint = /* the 64 is heuristic */ - !(info->indexed && - ((info->max_index - info->min_index + 64) < info->count)); - - nv50->vbo_min_index = info->min_index; - nv50->vbo_max_index = info->max_index; + !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); if (nv50->vbo_push_hint != !!nv50->vbo_fifo) nv50->dirty |= NV50_NEW_ARRAYS; diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 32de91e5644..7072b5918fa 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -120,8 +120,8 @@ struct nvc0_context { struct pipe_index_buffer idxbuf; uint32_t constant_vbos; uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ - unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ - unsigned vbo_max_index; + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; unsigned num_textures[5]; diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index fd932be1682..16d336cfab2 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -43,6 +43,7 @@ struct nvc0_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned size; /* size of vertex in bytes (when packed) */ struct nvc0_vertex_element element[0]; diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 1d4d07d93c2..5a6636f51c9 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -62,12 +62,15 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->instance_bufs = 0; so->need_conversion = FALSE; + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + transkey.nr_elements = 0; transkey.output_stride = 0; for (i = 0; i < num_elements; ++i) { const struct pipe_vertex_element *ve = &elements[i]; const unsigned vbi = ve->vertex_buffer_index; + unsigned size; enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; @@ -86,6 +89,10 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->element[i].state = nvc0_format_table[fmt].vtx; so->need_conversion = TRUE; } + size = util_format_get_blocksize(fmt); + + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; if (unlikely(ve->instance_divisor)) { so->instance_elts |= 1 << i; @@ -109,7 +116,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, transkey.output_stride = align(transkey.output_stride, ca); transkey.element[j].output_format = fmt; transkey.element[j].output_offset = transkey.output_stride; - transkey.output_stride += util_format_get_blocksize(fmt); + transkey.output_stride += size; so->element[i].state_alt = so->element[i].state; so->element[i].state_alt |= transkey.element[j].output_offset << 7; @@ -185,10 +192,11 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, *base = 0; *size = nvc0->vtxbuf[vbi].buffer->width0; } else { - assert(nvc0->vbo_max_index != ~0); - *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; - *size = (nvc0->vbo_max_index - - nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride; + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nvc0->vb_elt_limit != ~0); + *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; } } @@ -196,7 +204,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, * and validate vertex buffers and upload user arrays (if normal mode). */ static uint8_t -nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) +nvc0_prevalidate_vbufs(struct nvc0_context *nvc0, unsigned limits[]) { const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; struct nouveau_bo *bo; @@ -211,6 +219,7 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) for (i = 0; i < nvc0->num_vtxbufs; ++i) { vb = &nvc0->vtxbuf[i]; + limits[i] = 0; if (!vb->stride) continue; buf = nv04_resource(vb->buffer); @@ -224,6 +233,7 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) assert(vb->stride > vb->buffer_offset); nvc0->vbo_user |= 1 << i; nvc0_vbuf_range(nvc0, i, &base, &size); + limits[i] = base + size - 1; bo = nouveau_scratch_data(&nvc0->base, buf, base, size); if (bo) BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); @@ -295,6 +305,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; uint32_t const_vbos; unsigned i; + unsigned limits[PIPE_MAX_ATTRIBS]; uint8_t vbo_mode; boolean update_vertex; @@ -303,7 +314,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) nvc0->vbo_user = 0; vbo_mode = 3; } else { - vbo_mode = nvc0_prevalidate_vbufs(nvc0); + vbo_mode = nvc0_prevalidate_vbufs(nvc0, limits); } const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; @@ -378,7 +389,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) PUSH_SPACE(push, vertex->num_elements * 8); for (i = 0; i < vertex->num_elements; ++i) { struct nv04_resource *res; - unsigned size, offset; + unsigned limit, offset; if (nvc0->state.constant_elts & (1 << i)) continue; @@ -387,7 +398,9 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) res = nv04_resource(vb->buffer); offset = ve->pipe.src_offset + vb->buffer_offset; - size = vb->buffer->width0; + limit = limits[ve->pipe.vertex_buffer_index]; + if (!limit) + limit = vb->buffer->width0 - 1; if (unlikely(ve->pipe.instance_divisor)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); @@ -402,8 +415,8 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) PUSH_DATA (push, res->address + offset); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - PUSH_DATAh(push, res->address + size - 1); - PUSH_DATA (push, res->address + size - 1); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); } } @@ -699,15 +712,15 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nvc0->vb_elt_first = info->min_index + info->index_bias; + nvc0->vb_elt_limit = info->max_index - info->min_index; + /* For picking only a few vertices from a large user buffer, push is better, * if index count is larger and we expect repeated vertices, suggest upload. */ nvc0->vbo_push_hint = - info->indexed && - (info->max_index - info->min_index) >= (info->count * 2); - - nvc0->vbo_min_index = info->min_index; - nvc0->vbo_max_index = info->max_index; + info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); /* Check whether we want to switch vertex-submission mode, * and if not, update user vbufs.