nv50: use "real" constbufs for shaders + tcb uploads
authorBen Skeggs <skeggsb@gmail.com>
Sun, 1 Jun 2008 13:10:31 +0000 (23:10 +1000)
committerBen Skeggs <skeggsb@gmail.com>
Sun, 29 Jun 2008 05:46:13 +0000 (15:46 +1000)
src/gallium/drivers/nv50/nv50_context.h
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_screen.c
src/gallium/drivers/nv50/nv50_screen.h
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state.h
src/gallium/drivers/nv50/nv50_state_validate.c

index d4d716b94bff6e40071d28aedbe8abf2f8e1afb5..c4a8a4c064ea40e14d4d9cbdd4ccceed08b74331 100644 (file)
 #define NOUVEAU_MSG(fmt, args...) \
        fprintf(stderr, "nouveau: "fmt, ##args);
 
+/* Constant buffer assignment */
+#define NV50_CB_PMISC          0
+#define NV50_CB_PVP            1
+#define NV50_CB_PFP            2
+#define NV50_CB_PGP            3
+#define NV50_CB_TIC            4
+#define NV50_CB_TSC            5
+
 #define NV50_NEW_BLEND         (1 << 0)
 #define NV50_NEW_ZSA           (1 << 1)
 #define NV50_NEW_BLEND_COLOUR  (1 << 2)
 #define NV50_NEW_RASTERIZER    (1 << 6)
 #define NV50_NEW_FRAMEBUFFER   (1 << 7)
 #define NV50_NEW_VERTPROG      (1 << 8)
-#define NV50_NEW_FRAGPROG      (1 << 9)
-#define NV50_NEW_ARRAYS                (1 << 10)
+#define NV50_NEW_VERTPROG_CB   (1 << 9)
+#define NV50_NEW_FRAGPROG      (1 << 10)
+#define NV50_NEW_FRAGPROG_CB   (1 << 11)
+#define NV50_NEW_ARRAYS                (1 << 12)
 
 struct nv50_blend_stateobj {
        struct pipe_blend_state pipe;
index 30953b7d8af9194fa3e661619deeaf1dd31a1b7d..0a436469238723a011f08573fcd8626d9011f6ab 100644 (file)
@@ -179,6 +179,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst,
 
                if (src1) {
                        if (src1->type == P_CONST || src1->type == P_IMMD) {
+                               if (src1->type == P_IMMD)
+                                       inst[1] |= (NV50_CB_PMISC << 22);
+                               else
+                                       inst[1] |= (NV50_CB_PVP << 22);
                                inst[0] |= 0x00800000; /* src1 is const */
                                /*XXX: does src1 come from "src2" now? */
                                alloc_reg(pc, src1);
@@ -196,6 +200,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst,
 
                if (src2) {
                        if (src2->type == P_CONST || src2->type == P_IMMD) {
+                               if (src2->type == P_IMMD)
+                                       inst[1] |= (NV50_CB_PMISC << 22);
+                               else
+                                       inst[1] |= (NV50_CB_PVP << 22);
                                inst[0] |= 0x01000000; /* src2 is const */
                                inst[1] |= (src2->hw << 14);
                        } else {
@@ -526,7 +534,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
        }
 
        if (pc->immd_nr) {
-               int rid = pc->param_nr * 4;
+               int rid = 0;
 
                pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
                if (!pc->immd)
@@ -581,7 +589,6 @@ nv50_program_tx(struct nv50_program *p)
                }
        }
 
-       p->param_nr = pc->param_nr * 4;
        p->immd_nr = pc->immd_nr * 4;
        p->immd = pc->immd_buf;
 
@@ -654,23 +661,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        memcpy(map, p->insns, p->insns_nr * 4);
        ws->buffer_unmap(ws, p->buffer);
 
-       if (p->param_nr) {
-               float *cb;
-
-               cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX],
-                                   PIPE_BUFFER_USAGE_CPU_READ);
-               for (i = 0; i < p->param_nr; i++) {
-                       BEGIN_RING(tesla, 0x0f00, 2);
-                       OUT_RING  (i << 8);
-                       OUT_RING  (fui(cb[i]));
-               }
-               ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]);
-       }
-
-
        for (i = 0; i < p->immd_nr; i++) {
                BEGIN_RING(tesla, 0x0f00, 2);
-               OUT_RING  ((p->param_nr + i) << 8);
+               OUT_RING  ((NV50_CB_PMISC << 16) | (i << 8));
                OUT_RING  (fui(p->immd[i]));
        }
 
index fc3eeed9133e762dfcea0c72f8403931abae3db5..6c0810a9cfe01fc6263ae8a936834f9a81990f12 100644 (file)
@@ -203,14 +203,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
                return NULL;
        }
 
-       /* Static constant buffer */
-       screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4);
-       if (nvws->res_init(&screen->vp_data_heap, 0, 256)) {
-               NOUVEAU_ERR("Error initialising constant buffer\n");
-               nv50_screen_destroy(&screen->pipe);
-               return NULL;
-       }
-
        /* Static tesla init */
        so = so_new(256, 20);
 
@@ -245,37 +237,56 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
        so_method(so, screen->tesla, 0x16b8, 1);
        so_data  (so, 8);
 
+       /* Shared constant buffer */
+       screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4);
+       if (nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+               NOUVEAU_ERR("Error initialising constant buffer\n");
+               nv50_screen_destroy(&screen->pipe);
+               return NULL;
+       }
+
        so_method(so, screen->tesla, 0x1280, 3);
        so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00001000);
+       so_data  (so, (NV50_CB_PMISC << 16) | 0x00001000);
+
+       /* Texture sampler/image unit setup - we abuse the constant buffer
+        * upload mechanism for the moment to upload data to the tex config
+        * blocks.  At some point we *may* want to go the NVIDIA way of doing
+        * things?
+        */
+       screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
        so_method(so, screen->tesla, 0x1280, 3);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00014000);
-       so_method(so, screen->tesla, 0x1280, 3);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_data  (so, (NV50_CB_TIC << 16) | 0x0800);
+       so_method(so, screen->tesla, 0x1574, 3);
+       so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00024000);
+       so_data  (so, 0x00000800);
+
+       screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
        so_method(so, screen->tesla, 0x1280, 3);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00034000);
-       so_method(so, screen->tesla, 0x1280, 3);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_data  (so, (NV50_CB_TSC << 16) | 0x0800);
+       so_method(so, screen->tesla, 0x155c, 3);
+       so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+       so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00040100);
+       so_data  (so, 0x00000800);
+
 
+       /* Vertex array limits - max them out */
        for (i = 0; i < 16; i++) {
                so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
                so_data  (so, 0x000000ff);
index d63dd485085dc303e350e22db2ecf0bc782f2e37..5acb5003ba4b85ccf16b84e6e9381b57a8f46c24 100644 (file)
@@ -15,6 +15,9 @@ struct nv50_screen {
 
        struct pipe_buffer *constbuf;
        struct nouveau_resource *vp_data_heap;
+
+       struct pipe_buffer *tic;
+       struct pipe_buffer *tsc;
 };
 
 static INLINE struct nv50_screen *
index fd10a38378209a42086b17c81de7e9f362f8140a..ba3d04cede355f420707b2cbf6927699b67dae7c 100644 (file)
@@ -406,11 +406,11 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
 
        if (shader == PIPE_SHADER_VERTEX) {
                nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
-               nv50->dirty |= NV50_NEW_VERTPROG;
+               nv50->dirty |= NV50_NEW_VERTPROG_CB;
        } else
        if (shader == PIPE_SHADER_FRAGMENT) {
                nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
-               nv50->dirty |= NV50_NEW_FRAGPROG;
+               nv50->dirty |= NV50_NEW_FRAGPROG_CB;
        }
 }
 
index be0c75ad6ea80b6ef9f930434b899d256dda8615..9e3876871bd64b5a4eb24a237632c606dad4ceab 100644 (file)
@@ -20,7 +20,6 @@ struct nv50_program {
 
        struct pipe_buffer *buffer;
 
-       unsigned param_nr;
        float *immd;
        unsigned immd_nr;
 
index 4a548378b7148fcddf59d4b8d25604d67ac88687..05395c6df7caf218467990bcd8391df717f72d66 100644 (file)
@@ -168,6 +168,35 @@ nv50_state_validate(struct nv50_context *nv50)
                so_ref(NULL, &so);
        }
 
+       if (nv50->dirty & NV50_NEW_VERTPROG_CB) {
+               so = so_new(4, 2);
+               so_method(so, tesla, 0x1280, 3);
+               so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0,
+                         NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
+                         0, 0);
+               so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0,
+                         NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
+                         0, 0);
+               so_data  (so, (NV50_CB_PVP << 16) | 0x1000);
+               so_emit(nvws, so);
+               so_ref(NULL, &so);
+       }
+
+       if (nv50->dirty & NV50_NEW_FRAGPROG_CB) {
+               so = so_new(4, 2);
+               so_method(so, tesla, 0x1280, 3);
+               so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0,
+                         NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
+                         0, 0);
+               so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0,
+                         NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
+                         0, 0);
+               so_data  (so, (NV50_CB_PFP << 16) | 0x1000);
+               so_emit(nvws, so);
+               so_ref(NULL, &so);
+       }
+
+
        if (nv50->dirty & NV50_NEW_ARRAYS)
                nv50_vbo_validate(nv50);