nv40: support for keeping multiple vtxprogs on the hw at the same time.
authorBen Skeggs <skeggsb@gmail.com>
Fri, 23 Nov 2007 07:54:11 +0000 (18:54 +1100)
committerBen Skeggs <skeggsb@gmail.com>
Fri, 23 Nov 2007 07:54:19 +0000 (18:54 +1100)
src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys.c
src/mesa/pipe/nouveau/nouveau_winsys.h
src/mesa/pipe/nv40/nv40_context.c
src/mesa/pipe/nv40/nv40_context.h
src/mesa/pipe/nv40/nv40_fragprog.c
src/mesa/pipe/nv40/nv40_state.c
src/mesa/pipe/nv40/nv40_state.h
src/mesa/pipe/nv40/nv40_state_emit.c
src/mesa/pipe/nv40/nv40_vbo.c
src/mesa/pipe/nv40/nv40_vertprog.c

index a2e85a0aa7fee701a8c2e2de76b26b983a377058..bdc89b953184f9424d72a7e98c2c53ce497f4d10 100644 (file)
@@ -3,6 +3,89 @@
 
 #include "pipe/nouveau/nouveau_winsys.h"
 
+static int
+nouveau_resource_init(struct nouveau_resource **heap, int size)
+{
+       struct nouveau_resource *r;
+
+       r = calloc(1, sizeof(struct nouveau_resource));
+       if (!r)
+               return 1;
+
+       r->start = 0;
+       r->size  = size;
+       *heap = r;
+       return 0;
+}
+
+static int
+nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv,
+                      struct nouveau_resource **res)
+{
+       struct nouveau_resource *r;
+
+       if (!heap || !size || !res || *res)
+               return 1;
+
+       while (heap) {
+               if (!heap->in_use && heap->size >= size) {
+                       r = calloc(1, sizeof(struct nouveau_resource));
+                       if (!r)
+                               return 1;
+
+                       r->start  = (heap->start + heap->size) - size;
+                       r->size   = size;
+                       r->in_use = TRUE;
+                       r->priv   = priv;
+
+                       heap->size -= size;
+
+                       r->next = heap->next;
+                       if (heap->next)
+                               heap->next->prev = r;
+                       r->prev = heap;
+                       heap->next = r;
+
+                       *res = r;
+                       return 0;
+               }
+                       
+               heap = heap->next;
+       }
+
+       return 1;
+}
+
+static void
+nouveau_resource_free(struct nouveau_resource **res)
+{
+       struct nouveau_resource *r;
+
+       if (!res || !*res)
+               return;
+       r = *res;
+
+       if (r->prev && !r->prev->in_use) {
+               r->prev->next = r->next;
+               if (r->next)
+                       r->next->prev = r->prev;
+               r->prev->size += r->size;
+               free(r);
+       } else
+       if (r->next && !r->next->in_use) {
+               r->next->prev = r->prev;
+               if (r->prev)
+                       r->prev->next = r->next;
+               r->next->size += r->size;
+               r->next->start = r->start;
+               free(r);
+       } else {
+               r->in_use = FALSE;
+       }
+
+       *res = NULL;
+}
+
 static int
 nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count,
                            struct nouveau_notifier **notify)
@@ -67,6 +150,10 @@ nouveau_pipe_create(struct nouveau_context *nv)
        nvws->nv                = nv;
        nvws->channel           = nv->channel;
 
+       nvws->res_init          = nouveau_resource_init;
+       nvws->res_alloc         = nouveau_resource_alloc;
+       nvws->res_free          = nouveau_resource_free;
+
        nvws->begin_ring        = nouveau_pipe_dma_beginp;
        nvws->out_reloc         = nouveau_bo_emit_reloc;
        nvws->fire_ring         = nouveau_pipe_dma_kickoff;
index beee49a134f051e2baf4322906e63cadcab52328..28f17cdc8fa7a94f6c5298215e585531a8df1b88 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 #include "pipe/p_winsys.h"
+#include "pipe/p_defines.h"
 
 #include "pipe/nouveau/nouveau_bo.h"
 #include "pipe/nouveau/nouveau_channel.h"
 #include "pipe/nouveau/nouveau_grobj.h"
 #include "pipe/nouveau/nouveau_notifier.h"
 
+struct nouveau_resource {
+       struct nouveau_resource *prev;
+       struct nouveau_resource *next;
+
+       boolean in_use;
+       void *priv;
+
+       uint start;
+       uint size;
+};
+
 struct nouveau_winsys {
        struct nouveau_context *nv;
 
        struct nouveau_channel *channel;
 
+       int  (*res_init)(struct nouveau_resource **heap, int size);
+       int  (*res_alloc)(struct nouveau_resource *heap, int size, void *priv,
+                         struct nouveau_resource **);
+       void (*res_free)(struct nouveau_resource **);
+
        /*XXX: this is crappy, and bound to be slow.. however, it's nice and
         *     simple, it'll do for the moment*/
        uint32_t *(*begin_ring)(struct nouveau_grobj *, int mthd, int size);
index 6f2709164325beafa387b9f8b5815884fd03c46a..ba9d875e0eddd8abbfb63dd4521f36499c04b539 100644 (file)
@@ -241,6 +241,14 @@ nv40_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws,
                return NULL;
        }
 
+       if (nvws->res_init(&nv40->vertprog.exec_heap, 512) ||
+           nvws->res_init(&nv40->vertprog.data_heap, 256)) {
+               nvws->res_free(&nv40->vertprog.exec_heap);
+               nvws->res_free(&nv40->vertprog.data_heap);
+               free(nv40);
+               return NULL;
+       }
+
        if (!nv40_init_hwctx(nv40, curie_class)) {
                free(nv40);
                return NULL;
index 63be38299fb17655466e410577dcbe86e4652fb7..ec07a8837066820aea7ec9bb78003d7d12c98abc 100644 (file)
@@ -44,16 +44,19 @@ struct nv40_context {
        uint32_t                   tex_dirty;
 
        struct {
-               struct nv40_vertex_program *vp;
-               struct nv40_vertex_program *active_vp;
+               struct nouveau_resource *exec_heap;
+               struct nouveau_resource *data_heap;
 
+               struct nv40_vertex_program *active;
+
+               struct nv40_vertex_program *current;
                struct pipe_buffer_handle *constant_buf;
        } vertprog;
 
        struct {
-               struct nv40_fragment_program *fp;
-               struct nv40_fragment_program *active_fp;
+               struct nv40_fragment_program *active;
 
+               struct nv40_fragment_program *current;
                struct pipe_buffer_handle *constant_buf;
        } fragprog;
 
index a5c562ef716ae54a2fe8f651ca9276cded71936a..09b68513bac55ada4a65dd4361a2813dd69a9069 100644 (file)
@@ -645,6 +645,6 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
        BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
        OUT_RING  (fp_control);
 
-       nv40->fragprog.active_fp = fp;
+       nv40->fragprog.active = fp;
 }
 
index 9b58409e51ebbe733abd56daaac0bacdb53c9008..b1928a5f91a2cd20d43d5bd08801d6302fd234c8 100644 (file)
@@ -443,7 +443,7 @@ nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
        struct nv40_context *nv40 = (struct nv40_context *)pipe;
        struct nv40_vertex_program *vp = hwcso;
 
-       nv40->vertprog.vp = vp;
+       nv40->vertprog.current = vp;
        nv40->dirty |= NV40_NEW_VERTPROG;
 }
 
@@ -471,7 +471,7 @@ nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
        struct nv40_context *nv40 = (struct nv40_context *)pipe;
        struct nv40_fragment_program *fp = hwcso;
 
-       nv40->fragprog.fp = fp;
+       nv40->fragprog.current = fp;
        nv40->dirty |= NV40_NEW_FRAGPROG;
 }
 
index 14bf5d83e3e1424728c27223692fa956128dfe65..d0114972ef2f01d9ba9605cdd2f1a161c0db36e2 100644 (file)
@@ -57,11 +57,13 @@ struct nv40_vertex_program {
        const struct pipe_shader_state *pipe;
 
        boolean translated;
-       boolean on_hw;
-       int start_ip;
 
+       struct nouveau_resource *exec;
        uint32_t *insn;
-       int       insn_len;
+       uint insn_len;
+
+       struct nouveau_resource *data;
+       uint data_start;
 
        struct {
                int pipe_id;
index 3224c8299942212d9510e59c7aa17e3634e005bb..55c7d36b12902276035394435824c8061d9041f5 100644 (file)
@@ -6,7 +6,7 @@ static INLINE void
 nv40_state_update_fragprog(struct nv40_context *nv40)
 {
        struct pipe_context *pipe = (struct pipe_context *)nv40;
-       struct nv40_fragment_program *fp = nv40->fragprog.fp;
+       struct nv40_fragment_program *fp = nv40->fragprog.current;
        float *map;
        int i;
 
@@ -36,57 +36,18 @@ nv40_state_update_fragprog(struct nv40_context *nv40)
        }
 }
 
-static INLINE void
-nv40_state_update_vertprog(struct nv40_context *nv40)
-{
-       struct pipe_context *pipe = (struct pipe_context *)nv40;
-       struct nv40_vertex_program *vp = nv40->vertprog.vp;
-       float *map;
-       int i, force_consts = 0;
-
-       if (!nv40->vertprog.vp->translated)
-               nv40_vertprog_translate(nv40, nv40->vertprog.vp);
-
-       if (nv40->vertprog.vp != nv40->vertprog.active_vp)
-               force_consts = 1;
-
-       if (vp->num_consts) {
-               map = pipe->winsys->buffer_map(pipe->winsys,
-                                              nv40->vertprog.constant_buf,
-                                              PIPE_BUFFER_FLAG_READ);
-               for (i = 0; i < vp->num_consts; i++) {
-                       uint pid = vp->consts[i].pipe_id;
-
-                       if (pid >= 0) {
-                               if (!force_consts &&
-                                   !memcmp(vp->consts[i].value, &map[pid*4],
-                                           4 * sizeof(float)))
-                                       continue;
-                               memcpy(vp->consts[i].value, &map[pid*4],
-                                      4 * sizeof(float));
-                       }
-
-                       BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
-                       OUT_RING  (vp->consts[i].hw_id);
-                       OUT_RINGp ((uint32_t *)vp->consts[i].value, 4);
-               }
-               pipe->winsys->buffer_unmap(pipe->winsys,
-                                          nv40->vertprog.constant_buf);
-       }
-}
-
 void
 nv40_emit_hw_state(struct nv40_context *nv40)
 {
        if (nv40->dirty & NV40_NEW_FRAGPROG) {
-               struct nv40_fragment_program *cur = nv40->fragprog.fp;
+               struct nv40_fragment_program *cur = nv40->fragprog.current;
 
                nv40_state_update_fragprog(nv40);
        
                if (cur->on_hw)
                        nv40->dirty &= ~NV40_NEW_FRAGPROG;
 
-               if (!cur->on_hw || cur != nv40->fragprog.active_fp)
+               if (!cur->on_hw || cur != nv40->fragprog.active)
                        nv40_fragprog_bind(nv40, cur);
        }
 
@@ -101,12 +62,8 @@ nv40_emit_hw_state(struct nv40_context *nv40)
                nv40->dirty &= ~(NV40_NEW_TEXTURE | NV40_NEW_FRAGPROG);
        }
 
-       if (nv40->dirty & NV40_NEW_VERTPROG) {
-               nv40_state_update_vertprog(nv40);
-               if (nv40->vertprog.vp != nv40->vertprog.active_vp)
-                       nv40_vertprog_bind(nv40, nv40->vertprog.vp);
-               nv40->dirty &= ~NV40_NEW_VERTPROG;
-       }
+       if (nv40->dirty & NV40_NEW_VERTPROG)
+               nv40_vertprog_bind(nv40, nv40->vertprog.current);
 
        if (nv40->dirty & NV40_NEW_ARRAYS) {
                nv40_vbo_arrays_update(nv40);
index aa930476b6f728a22b753ff3cc8e05d1b33e14c3..2da0001b2886d2b87fb6fe5361223a538a656e9c 100644 (file)
@@ -176,7 +176,7 @@ void
 nv40_vbo_arrays_update(struct nv40_context *nv40)
 {
        struct nouveau_winsys *nvws = nv40->nvws;
-       struct nv40_vertex_program *vp = nv40->vertprog.vp;
+       struct nv40_vertex_program *vp = nv40->vertprog.active;
        uint32_t inputs, vtxfmt[16];
        int hw, num_hw;
 
index 68334b7420ef5862d8b253dc3583b30ca88ef346..04bdaacc98adec61c15bb71bdfd30f23bd8c76f3 100644 (file)
@@ -576,21 +576,108 @@ out_err:
 void
 nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
 { 
+       struct nouveau_winsys *nvws = nv40->nvws;
+       struct pipe_context *pipe = &nv40->pipe;
+       boolean upload_code = FALSE, upload_data = FALSE;
+       float *map;
        int i;
 
+       /* Translate TGSI shader into hw bytecode */
        if (!vp->translated) {
-               NOUVEAU_ERR("vtxprog invalid, using passthrough shader\n");
-               vp = &passthrough_vp;
+               nv40_vertprog_translate(nv40, vp);
+               if (!vp->translated)
+                       assert(0);
        }
 
-       if (!vp->on_hw) {
-               if (nv40->vertprog.active_vp)
-                       nv40->vertprog.active_vp->on_hw = FALSE;
-               vp->on_hw    = TRUE;
-               vp->start_ip = 0;
+       /* Allocate hw vtxprog exec slots */
+       /*XXX: when we do branching, need to patch targets if program moves.
+        */
+       if (!vp->exec) {
+               struct nouveau_resource *heap = nv40->vertprog.exec_heap;
+               uint vplen = vp->insn_len / 4;
+
+               if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+                       while (heap->next && heap->size < vplen) {
+                               struct nv40_vertex_program *evict;
+                               
+                               evict = heap->next->priv;
+                               nvws->res_free(&evict->exec);
+                       }
+
+                       if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+                               assert(0);
+               }
+
+               upload_code = TRUE;
+       }
+
+       /* Allocate hw vtxprog const slots */
+       if (vp->num_consts && !vp->data) {
+               struct nouveau_resource *heap = nv40->vertprog.data_heap;
+               int count = vp->num_consts;
+
+               if (nvws->res_alloc(heap, count, vp, &vp->data)) {
+                       while (heap->next && heap->size < count) {
+                               struct nv40_vertex_program *evict;
+                               
+                               evict = heap->next->priv;
+                               nvws->res_free(&evict->data);
+                       }
+
+                       if (nvws->res_alloc(heap, count, vp, &vp->data))
+                               assert(0);
+               }
+
+               upload_data = TRUE;
+       }
+
+       /* If constants moved, patch the vtxprog to fix the offsets */
+       if (vp->num_consts && vp->data_start != vp->data->start) {
+               for (i = 0; i < vp->insn_len; i += 4) {
+                       int id;
+
+                       id = (vp->insn[i + 1] & NV40_VP_INST_CONST_SRC_MASK) >>
+                            NV40_VP_INST_CONST_SRC_SHIFT;
+                       id -= vp->data_start;
+                       id += vp->data->start;
+
+                       vp->insn[i + 1] &= ~NV40_VP_INST_CONST_SRC_MASK;
+                       vp->insn[i + 1] |= (id << NV40_VP_INST_CONST_SRC_SHIFT);
+               }
+
+               vp->data_start = vp->data->start;
+               upload_code = TRUE;
+       }
 
+       /* Update + Upload constant values */
+       if (vp->num_consts) {
+               map = pipe->winsys->buffer_map(pipe->winsys,
+                                              nv40->vertprog.constant_buf,
+                                              PIPE_BUFFER_FLAG_READ);
+               for (i = 0; i < vp->num_consts; i++) {
+                       uint pid = vp->consts[i].pipe_id;
+
+                       if (pid >= 0) {
+                               if (!upload_data &&
+                                   !memcmp(vp->consts[i].value, &map[pid*4],
+                                           4 * sizeof(float)))
+                                       continue;
+                               memcpy(vp->consts[i].value, &map[pid*4],
+                                      4 * sizeof(float));
+                       }
+
+                       BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+                       OUT_RING  (vp->consts[i].hw_id + vp->data->start);
+                       OUT_RINGp ((uint32_t *)vp->consts[i].value, 4);
+               }
+               pipe->winsys->buffer_unmap(pipe->winsys,
+                                          nv40->vertprog.constant_buf);
+       }
+
+       /* Upload vtxprog */
+       if (upload_code) {
                BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
-               OUT_RING  (vp->start_ip);
+               OUT_RING  (vp->exec->start);
                for (i = 0; i < vp->insn_len; i += 4) {
                        BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
                        OUT_RINGp (&vp->insn[i], 4);
@@ -598,10 +685,11 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
        }
 
        BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1);
-       OUT_RING  (vp->start_ip);
+       OUT_RING  (vp->exec->start);
        BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2);
        OUT_RING  (vp->ir);
        OUT_RING  (vp->or);
 
-       nv40->vertprog.active_vp = vp;
+       nv40->vertprog.active = vp;
 }
+