nv50: add support for address regs
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Mon, 19 Oct 2009 16:17:45 +0000 (18:17 +0200)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Mon, 19 Oct 2009 16:25:09 +0000 (18:25 +0200)
Allow indirect uniform access and increase the
limit on parameters from 128 to 512.

src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_screen.c

index bfd979ce0f6776a05e6bded404eef76e056c3947..c7145bb9beb07cae1322bc6c65555a56f50269a7 100644 (file)
@@ -32,6 +32,7 @@
 #include "nv50_context.h"
 
 #define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_ADDR 7
 //#define NV50_PROGRAM_DUMP
 
 /* ARL - gallium craps itself on progs/vp/arl.txt
@@ -79,7 +80,8 @@ struct nv50_reg {
                P_ATTR,
                P_RESULT,
                P_CONST,
-               P_IMMD
+               P_IMMD,
+               P_ADDR
        } type;
        int index;
 
@@ -99,6 +101,7 @@ struct nv50_pc {
 
        /* hw resources */
        struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+       struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
 
        /* tgsi resources */
        struct nv50_reg *temp;
@@ -112,6 +115,8 @@ struct nv50_pc {
        struct nv50_reg *immd;
        float *immd_buf;
        int immd_nr;
+       struct nv50_reg **addr;
+       int addr_nr;
 
        struct nv50_reg *temp_temp[16];
        unsigned temp_temp_nr;
@@ -158,6 +163,17 @@ popcnt4(uint32_t val)
        return cnt[val & 0xf];
 }
 
+static void
+terminate_mbb(struct nv50_pc *pc)
+{
+       int i;
+
+       /* remove records of temporary address register values */
+       for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+               if (pc->r_addr[i].index < 0)
+                       pc->r_addr[i].rhw = -1;
+}
+
 static void
 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 {
@@ -454,9 +470,68 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
        e->inst[1] |= (val >> 6) << 2;
 }
 
+static void
+emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       assert(val <= 0xffff);
+       e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9);
+       e->inst[1] = 0x20000000;
+       e->inst[0] |= dst->hw << 2;
+       set_long(pc, e);
+
+       emit(pc, e);
+}
+
+static struct nv50_reg *
+alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+       int i;
+       struct nv50_reg *a = NULL;
+
+       if (!ref) {
+               for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+                       if (pc->r_addr[i].index >= 0)
+                               continue;
+                       if (pc->r_addr[i].rhw >= 0 &&
+                           pc->r_addr[i].acc == pc->insn_cur)
+                               continue;
+
+                       pc->r_addr[i].rhw = -1;
+                       pc->r_addr[i].index = i;
+                       return &pc->r_addr[i];
+               }
+               assert(0);
+               return NULL;
+       }
+
+       for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
+               if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
+                       continue;
+               if (pc->r_addr[i].rhw < 0) { /* unused */
+                       a = &pc->r_addr[i];
+                       continue;
+               }
+               if (!a && pc->r_addr[i].acc != pc->insn_cur)
+                       a = &pc->r_addr[i];
+
+               if (ref->hw - pc->r_addr[i].rhw < 128) {
+               /* alloc'd & suitable */
+                       pc->r_addr[i].acc = pc->insn_cur;
+                       return &pc->r_addr[i];
+               }
+       }
+       assert(a);
+       emit_set_addr(pc, a, ref->hw * 4);
+
+       a->rhw = ref->hw % 128;
+       a->acc = pc->insn_cur;
+       return a;
+}
 
 #define INTERP_LINEAR          0
-#define INTERP_FLAT                    1
+#define INTERP_FLAT            1
 #define INTERP_PERSPECTIVE     2
 #define INTERP_CENTROID                4
 
@@ -488,6 +563,16 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
        emit(pc, e);
 }
 
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+       assert(!(e->inst[0] & 0x0c000000));
+       assert(!(e->inst[1] & 0x00000004));
+
+       e->inst[0] |= (a->hw & 3) << 26;
+       e->inst[1] |= (a->hw >> 2) << 2;
+}
+
 static void
 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
         struct nv50_program_exec *e)
@@ -498,6 +583,14 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
        e->param.shift = s;
        e->param.mask = m << (s % 32);
 
+       if (src->hw > 127)
+               set_addr(e, alloc_addr(pc, src));
+       else
+       if (src->acc < 0) {
+               assert(src->type == P_CONST);
+               set_addr(e, pc->addr[src->index]);
+       }
+
        e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
 }
 
@@ -632,7 +725,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
        }
 
        alloc_reg(pc, src);
-       e->inst[0] |= (src->hw << 16);
+       e->inst[0] |= ((src->hw & 127) << 16);
 }
 
 static void
@@ -660,7 +753,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
        }
 
        alloc_reg(pc, src);
-       e->inst[1] |= (src->hw << 14);
+       e->inst[1] |= ((src->hw & 127) << 14);
 }
 
 static void
@@ -722,6 +815,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
        emit(pc, e);
 }
 
+static void
+emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+        uint8_t s)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       set_long(pc, e);
+       e->inst[1] |= 0xc0000000;
+
+       e->inst[0] |= dst->hw << 2;
+       e->inst[0] |= s << 16; /* shift left */
+       set_src_0_restricted(pc, src, e);
+
+       emit(pc, e);
+}
+
 static void
 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
            struct nv50_reg *src0, struct nv50_reg *src1)
@@ -1403,6 +1512,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
                return &pc->temp[dst->DstRegister.Index * 4 + c];
        case TGSI_FILE_OUTPUT:
                return &pc->result[dst->DstRegister.Index * 4 + c];
+       case TGSI_FILE_ADDRESS:
+       {
+               struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c];
+               if (!r) {
+                       r = alloc_addr(pc, NULL);
+                       pc->addr[dst->DstRegister.Index * 4 + c] = r;
+               }
+               assert(r);
+               return r;
+       }
        case TGSI_FILE_NULL:
                return NULL;
        default:
@@ -1418,7 +1537,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 {
        struct nv50_reg *r = NULL;
        struct nv50_reg *temp;
-       unsigned sgn, c;
+       unsigned sgn, c, swz;
+
+       if (src->SrcRegister.File != TGSI_FILE_CONSTANT)
+               assert(!src->SrcRegister.Indirect);
 
        sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
 
@@ -1436,13 +1558,29 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
                        r = &pc->temp[src->SrcRegister.Index * 4 + c];
                        break;
                case TGSI_FILE_CONSTANT:
-                       r = &pc->param[src->SrcRegister.Index * 4 + c];
+                       if (!src->SrcRegister.Indirect) {
+                               r = &pc->param[src->SrcRegister.Index * 4 + c];
+                               break;
+                       }
+                       /* Indicate indirection by setting r->acc < 0 and
+                        * use the index field to select the address reg.
+                        */
+                       r = MALLOC_STRUCT(nv50_reg);
+                       swz = tgsi_util_get_src_register_swizzle(
+                                                &src->SrcRegisterInd, 0);
+                       ctor_reg(r, P_CONST,
+                                src->SrcRegisterInd.Index * 4 + swz, c);
+                       r->acc = -1;
                        break;
                case TGSI_FILE_IMMEDIATE:
                        r = &pc->immd[src->SrcRegister.Index * 4 + c];
                        break;
                case TGSI_FILE_SAMPLER:
                        break;
+               case TGSI_FILE_ADDRESS:
+                       r = pc->addr[src->SrcRegister.Index * 4 + c];
+                       assert(r);
+                       break;
                default:
                        assert(0);
                        break;
@@ -1678,8 +1816,15 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                        emit_add(pc, dst[c], src[0][c], src[1][c]);
                }
                break;
+       case TGSI_OPCODE_ARL:
+               assert(src[0][0]);
+               temp = temp_temp(pc);
+               emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+               emit_arl(pc, dst[0], temp, 4);
+               break;
        case TGSI_OPCODE_BGNLOOP:
                pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+               terminate_mbb(pc);
                break;
        case TGSI_OPCODE_BRK:
                emit_branch(pc, -1, 0, NULL);
@@ -1763,6 +1908,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                emit_branch(pc, -1, 0, NULL);
                pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
                pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+               terminate_mbb(pc);
                break;
        case TGSI_OPCODE_ENDIF:
                pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -1775,6 +1921,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                        pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
                        pc->br_join[pc->if_lvl] = NULL;
                }
+               terminate_mbb(pc);
                /* emit a NOP as join point, we could set it on the next
                 * one, but would have to make sure it is long and !immd
                 */
@@ -1785,6 +1932,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                emit_branch(pc, -1, 0, NULL);
                pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
                pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+               terminate_mbb(pc);
                break;
        case TGSI_OPCODE_EX2:
                emit_preex2(pc, temp, src[0][0]);
@@ -1812,6 +1960,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                set_pred_wr(pc, 1, 0, pc->if_cond);
                emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
                pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+               terminate_mbb(pc);
                break;
        case TGSI_OPCODE_KIL:
                emit_kil(pc, src[0][0]);
@@ -1989,6 +2138,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
                        src[i][c]->neg = 0;
                        if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
                                FREE(src[i][c]);
+                       else
+                       if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
+                               FREE(src[i][c]); /* indirect constant */
                }
        }
 
@@ -2332,8 +2484,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                                        pc->interp_mode[i] = mode;
                        }
                                break;
+                       case TGSI_FILE_ADDRESS:
                        case TGSI_FILE_CONSTANT:
-                               break;
                        case TGSI_FILE_SAMPLER:
                                break;
                        default:
@@ -2527,6 +2679,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
        pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
        pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
        pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+       pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
+       assert(pc->addr_nr <= 2);
 
        p->cfg.high_temp = 4;
 
@@ -2595,6 +2749,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
                                ctor_reg(&pc->param[rid], P_CONST, i, rid);
        }
 
+       if (pc->addr_nr) {
+               pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
+               if (!pc->addr)
+                       return FALSE;
+       }
+       for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+               ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
        return TRUE;
 }
 
@@ -2774,7 +2936,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
                                         p->immd_nr, NV50_CB_PMISC);
        }
 
-       assert(p->param_nr <= 128);
+       assert(p->param_nr <= 512);
 
        if (p->param_nr) {
                unsigned cb;
index 66361dc3bafdf403fc632903e233d41bf2f45fc2..0bd54876957ab4c862c46c7d66515af494163a1d 100644 (file)
@@ -301,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_data  (so, 8);
 
        /* constant buffers for immediates and VP/FP parameters */
-       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
                             &screen->constbuf_misc[0]);
        if (ret) {
                nv50_screen_destroy(pscreen);
@@ -309,7 +309,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        }
 
        for (i = 0; i < 2; i++) {
-               ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+               ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
                                     &screen->constbuf_parm[i]);
                if (ret) {
                        nv50_screen_destroy(pscreen);
@@ -318,8 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        }
 
        if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
-               nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
-               nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+           nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
+           nouveau_resource_init(&screen->parm_heap[1], 0, 512))
        {
                NOUVEAU_ERR("Error initialising constant buffers.\n");
                nv50_screen_destroy(pscreen);
@@ -340,7 +340,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, (NV50_CB_PMISC << 16) | 0x00000800);
+       so_data  (so, (NV50_CB_PMISC << 16) | 0x00000200);
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000001 | (NV50_CB_PMISC << 12));
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);