From edbfeed56f1ebd8517840ef48f8c87e24bb98157 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 16 Mar 2012 17:37:32 +0100 Subject: [PATCH] nvc0: improve vertex state validation Now updating vertex attribute format only when necessary. --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 2 + src/gallium/drivers/nvc0/nvc0_context.h | 5 +- src/gallium/drivers/nvc0/nvc0_graph_macros.h | 14 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nvc0/nvc0_state.c | 45 ++++- src/gallium/drivers/nvc0/nvc0_vbo.c | 189 ++++++++++--------- 6 files changed, 159 insertions(+), 97 deletions(-) diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 05f967313eb..61ca1b87262 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -1307,6 +1307,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 #define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 +#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800 + #define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820 #define NVC0_3D_BLEND_ENABLES 0x00003858 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index fdb58898ebd..8b80f2fe386 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -84,6 +84,8 @@ struct nvc0_context { boolean prim_restart; uint32_t instance_elts; /* bitmask of per-instance elements */ uint32_t instance_base; + uint32_t constant_vbos; + uint32_t constant_elts; int32_t index_bias; uint16_t scissor; uint8_t num_vtxbufs; @@ -115,6 +117,7 @@ struct nvc0_context { struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; struct pipe_index_buffer idxbuf; + uint32_t constant_vbos; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */ @@ -240,7 +243,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, void nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); -void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0); +void nvc0_vertex_arrays_validate(struct nvc0_context *); void nvc0_idxbuf_validate(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h index a0a875fe627..f009980c629 100644 --- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -8,6 +8,20 @@ * bra(n)z annul: no delay slot */ +/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[]. + * Args: size, bitfield + */ +static const uint32_t nvc0_9097_per_instance_bf[] = +{ + 0x00000301, /* parm $r3 (the bitfield) */ + 0x00000211, /* mov $r2 0 */ + 0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */ + 0xffffc911, /* mov $r1 (add $r1 -0x1) */ + 0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */ + 0xffff8897, /* exit branz $r1 0x3 */ + 0x00005211 /* mov $r2 (add $r2 0x1) */ +}; + /* The comments above the macros describe what they *should* be doing, * but we use less functionality for now. */ diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 52cc00134c0..949443d1603 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -623,6 +623,7 @@ nvc0_screen_create(struct nouveau_device *dev) #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf); MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables); MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select); diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index d493f6e7fa0..eb71e6b9853 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -746,19 +746,44 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *vb) { struct nvc0_context *nvc0 = nvc0_context(pipe); + uint32_t constant_vbos = 0; unsigned i; - for (i = 0; i < count; ++i) - pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); - for (; i < nvc0->num_vtxbufs; ++i) - pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); - - memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); - nvc0->num_vtxbufs = count; - - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + if (count != nvc0->num_vtxbufs) { + for (i = 0; i < count; ++i) { + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); + nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + nvc0->vtxbuf[i].stride = vb[i].stride; + if (!vb[i].stride) + constant_vbos |= 1 << i; + } + for (; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + nvc0->num_vtxbufs = count; + nvc0->dirty |= NVC0_NEW_ARRAYS; + } else { + for (i = 0; i < count; ++i) { + if (nvc0->vtxbuf[i].buffer == vb[i].buffer && + nvc0->vtxbuf[i].buffer_offset == vb[i].buffer_offset && + nvc0->vtxbuf[i].stride == vb[i].stride) + continue; + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); + nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + nvc0->vtxbuf[i].stride = vb[i].stride; + if (likely(vb[i].stride)) + nvc0->dirty |= NVC0_NEW_ARRAYS; + else + constant_vbos |= 1 << i; + } + } + if (constant_vbos != nvc0->constant_vbos) { + nvc0->constant_vbos = constant_vbos; + nvc0->dirty |= NVC0_NEW_ARRAYS; + } - nvc0->dirty |= NVC0_NEW_ARRAYS; + if (nvc0->dirty & NVC0_NEW_ARRAYS) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); } static void diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 9c13adff807..7cb1e0a43b7 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -126,26 +126,44 @@ nvc0_vertex_state_create(struct pipe_context *pipe, ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) static void -nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb, - struct pipe_vertex_element *ve, unsigned attr) +nvc0_update_constant_vertex_attribs(struct nvc0_context *nvc0) { - const void *data; - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nv04_resource *res = nv04_resource(vb->buffer); - float v[4]; - int i; - const unsigned nc = util_format_get_nr_components(ve->src_format); + uint32_t mask = nvc0->state.constant_elts; - data = nouveau_resource_map_offset(&nvc0->base, res, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_RD); + while (unlikely(mask)) { + const int i = ffs(mask) - 1; + uint32_t mode; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + const struct util_format_description *desc; + void *dst; + const void *src = nouveau_resource_map_offset(&nvc0->base, + nv04_resource(vb->buffer), + vb->buffer_offset + ve->src_offset, NOUVEAU_BO_RD); - util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); + mask &= ~(1 << i); - PUSH_SPACE(push, 6); - BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), nc + 1); - PUSH_DATA (push, VTX_ATTR(attr, nc, FLOAT, 32)); - for (i = 0; i < nc; ++i) - PUSH_DATAf(push, v[i]); + desc = util_format_description(ve->src_format); + + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5); + dst = push->cur + 1; + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + mode = VTX_ATTR(i, 4, SINT, 32); + desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1); + } else { + mode = VTX_ATTR(i, 4, UINT, 32); + desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1); + } + } else { + mode = VTX_ATTR(i, 4, FLOAT, 32); + desc->unpack_rgba_float(dst, 0, src, 0, 1, 1); + } + *push->cur = mode; + push->cur += 5; + } } static INLINE void @@ -225,13 +243,8 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; struct nv04_resource *buf = nv04_resource(vb->buffer); - if (!(nvc0->vbo_user & (1 << b))) - continue; - - if (!vb->stride) { - nvc0_emit_vtxattr(nvc0, vb, ve, i); + if (!(nvc0->vbo_user & (1 << b)) || !vb->stride) continue; - } nvc0_vbuf_range(nvc0, b, &base, &size); if (!(written & (1 << b))) { @@ -268,83 +281,88 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_stateobj *vertex = nvc0->vertex; struct pipe_vertex_buffer *vb; struct nvc0_vertex_element *ve; + uint32_t const_vbos; unsigned i; + boolean update_vertex; if (unlikely(vertex->need_conversion) || unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { - nvc0->vbo_fifo = ~0; nvc0->vbo_user = 0; + nvc0->vbo_fifo = ~nvc0->constant_vbos; } else { nvc0_prevalidate_vbufs(nvc0); + nvc0->vbo_fifo &= ~nvc0->constant_vbos; } + const_vbos = nvc0->vbo_fifo ? 0 : nvc0->constant_vbos; + + update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) || + (const_vbos != nvc0->state.constant_vbos); + if (update_vertex) { + uint32_t *restrict data; + const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts); + + if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) { + nvc0->state.instance_elts = vertex->instance_elts; + assert(n); /* if (n == 0), both masks should be 0 */ + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2); + PUSH_DATA (push, n); + PUSH_DATA (push, vertex->instance_elts); + } - PUSH_SPACE(push, vertex->num_elements + 1); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); - for (i = 0; i < vertex->num_elements; ++i) { - ve = &vertex->element[i]; - vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; - - if (likely(vb->stride) || nvc0->vbo_fifo) { - PUSH_DATA(push, ve->state); - } else { - PUSH_DATA(push, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST); - nvc0->vbo_fifo &= ~(1 << i); + nvc0->state.num_vtxelts = vertex->num_elements; + nvc0->state.constant_vbos = const_vbos; + nvc0->state.constant_elts = 0; + + PUSH_SPACE(push, n * 2 + 1); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n); + data = push->cur; + push->cur += n; + for (i = 0; i < vertex->num_elements; ++data, ++i) { + ve = &vertex->element[i]; + *data = ve->state; + if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) { + *data |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST; + nvc0->state.constant_elts |= 1 << i; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } + } + for (; i < n; ++data, ++i) { + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + *data = NVC0_3D_VERTEX_ATTRIB_INACTIVE; } } - PUSH_SPACE(push, vertex->num_elements * 16); + PUSH_SPACE(push, vertex->num_elements * 8); for (i = 0; i < vertex->num_elements; ++i) { struct nv04_resource *res; unsigned size, offset; - + + if (nvc0->state.constant_elts & (1 << i)) + continue; ve = &vertex->element[i]; vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; - if (unlikely(ve->pipe.instance_divisor)) { - if (!(nvc0->state.instance_elts & (1 << i))) { - IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); - } - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1); - PUSH_DATA (push, ve->pipe.instance_divisor); - } else - if (unlikely(nvc0->state.instance_elts & (1 << i))) { - IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); - } - res = nv04_resource(vb->buffer); - - if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) { - if (!nvc0->vbo_fifo) - nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); - PUSH_DATA (push, 0); - continue; - } - - size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + size = vb->buffer->width0; - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); - PUSH_DATA (push, (1 << 12) | vb->stride); - BEGIN_1IC0(push, NVC0_3D(VERTEX_ARRAY_SELECT), 5); - PUSH_DATA (push, i); + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); PUSH_DATAh(push, res->address + size - 1); PUSH_DATA (push, res->address + size - 1); - PUSH_DATAh(push, res->address + offset); - PUSH_DATA (push, res->address + offset); } - for (; i < nvc0->state.num_vtxelts; ++i) { - PUSH_SPACE(push, 5); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(i)), 1); - PUSH_DATA (push, NVC0_3D_VERTEX_ATTRIB_INACTIVE); - if (unlikely(nvc0->state.instance_elts & (1 << i))) - IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); - PUSH_DATA (push, 0); - } - - nvc0->state.num_vtxelts = vertex->num_elements; - nvc0->state.instance_elts = vertex->instance_elts; } void @@ -393,7 +411,6 @@ nvc0_prim_gl(unsigned prim) NVC0_PRIM_GL_CASE(PATCHES); */ default: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; - break; } } @@ -666,6 +683,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nvc0_default_kick_notify; return; } + nvc0_update_constant_vertex_attribs(nvc0); /* space for base instance, flush, and prim restart */ PUSH_SPACE(push, 8); @@ -678,19 +696,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } if (nvc0->base.vbo_dirty) { - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 1); - PUSH_DATA (push, 0); + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); nvc0->base.vbo_dirty = FALSE; } - if (unlikely(info->count_from_stream_output)) { - nvc0_draw_stream_output(nvc0, info); - } else - if (!info->indexed) { - nvc0_draw_arrays(nvc0, - info->mode, info->start, info->count, - info->instance_count); - } else { + if (info->indexed) { boolean shorten = info->max_index <= 65535; assert(nvc0->idxbuf.buffer); @@ -719,6 +729,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_draw_elements(nvc0, shorten, info->mode, info->start, info->count, info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nvc0_draw_stream_output(nvc0, info); + } else { + nvc0_draw_arrays(nvc0, + info->mode, info->start, info->count, + info->instance_count); } push->kick_notify = nvc0_default_kick_notify; -- 2.30.2