src/gallium/drivers/nv50/nv50_push.c

   1 #include "pipe/p_context.h"
   2 #include "pipe/p_state.h"
   3 #include "util/u_inlines.h"
   4 #include "util/u_format.h"
   5
   6 #include "nouveau/nouveau_util.h"
   7 #include "nv50_context.h"
   8 #include "nv50_resource.h"
   9
  10 struct push_context {
  11    struct nv50_context *nv50;
  12
  13    unsigned vtx_size;
  14
  15    void *idxbuf;
  16    int32_t idxbias;
  17    unsigned idxsize;
  18
  19    float edgeflag;
  20    int edgeflag_attr;
  21
  22    struct {
  23       void *map;
  24       unsigned stride;
  25       unsigned divisor;
  26       unsigned step;
  27       void (*push)(struct nouveau_channel *, void *);
  28    } attr[16];
  29    unsigned attr_nr;
  30 };
  31
  32 static void
  33 emit_b32_1(struct nouveau_channel *chan, void *data)
  34 {
  35    uint32_t *v = data;
  36
  37    OUT_RING(chan, v[0]);
  38 }
  39
  40 static void
  41 emit_b32_2(struct nouveau_channel *chan, void *data)
  42 {
  43    uint32_t *v = data;
  44
  45    OUT_RING(chan, v[0]);
  46    OUT_RING(chan, v[1]);
  47 }
  48
  49 static void
  50 emit_b32_3(struct nouveau_channel *chan, void *data)
  51 {
  52    uint32_t *v = data;
  53
  54    OUT_RING(chan, v[0]);
  55    OUT_RING(chan, v[1]);
  56    OUT_RING(chan, v[2]);
  57 }
  58
  59 static void
  60 emit_b32_4(struct nouveau_channel *chan, void *data)
  61 {
  62    uint32_t *v = data;
  63
  64    OUT_RING(chan, v[0]);
  65    OUT_RING(chan, v[1]);
  66    OUT_RING(chan, v[2]);
  67    OUT_RING(chan, v[3]);
  68 }
  69
  70 static void
  71 emit_b16_1(struct nouveau_channel *chan, void *data)
  72 {
  73    uint16_t *v = data;
  74
  75    OUT_RING(chan, v[0]);
  76 }
  77
  78 static void
  79 emit_b16_3(struct nouveau_channel *chan, void *data)
  80 {
  81    uint16_t *v = data;
  82
  83    OUT_RING(chan, (v[1] << 16) | v[0]);
  84    OUT_RING(chan, v[2]);
  85 }
  86
  87 static void
  88 emit_b08_1(struct nouveau_channel *chan, void *data)
  89 {
  90    uint8_t *v = data;
  91
  92    OUT_RING(chan, v[0]);
  93 }
  94
  95 static void
  96 emit_b08_3(struct nouveau_channel *chan, void *data)
  97 {
  98    uint8_t *v = data;
  99
 100    OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
 101 }
 102
 103 static INLINE void
 104 emit_vertex(struct push_context *ctx, unsigned n)
 105 {
 106    struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
 107    struct nouveau_channel *chan = tesla->channel;
 108    int i;
 109
 110    if (ctx->edgeflag_attr < 16) {
 111       float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
 112                         ctx->attr[ctx->edgeflag_attr].stride * n;
 113
 114       if (*edgeflag != ctx->edgeflag) {
 115          BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
 116          OUT_RING  (chan, *edgeflag ? 1 : 0);
 117          ctx->edgeflag = *edgeflag;
 118       }
 119    }
 120
 121    BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
 122    for (i = 0; i < ctx->attr_nr; i++)
 123       ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
 124 }
 125
 126 static void
 127 emit_edgeflag(void *priv, boolean enabled)
 128 {
 129    struct push_context *ctx = priv;
 130    struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
 131    struct nouveau_channel *chan = tesla->channel;
 132
 133    BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
 134    OUT_RING  (chan, enabled ? 1 : 0);
 135 }
 136
 137 static void
 138 emit_elt08(void *priv, unsigned start, unsigned count)
 139 {
 140    struct push_context *ctx = priv;
 141    uint8_t *idxbuf = ctx->idxbuf;
 142
 143    while (count--)
 144       emit_vertex(ctx, idxbuf[start++]);
 145 }
 146
 147 static void
 148 emit_elt08_biased(void *priv, unsigned start, unsigned count)
 149 {
 150    struct push_context *ctx = priv;
 151    uint8_t *idxbuf = ctx->idxbuf;
 152
 153    while (count--)
 154       emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
 155 }
 156
 157 static void
 158 emit_elt16(void *priv, unsigned start, unsigned count)
 159 {
 160    struct push_context *ctx = priv;
 161    uint16_t *idxbuf = ctx->idxbuf;
 162
 163    while (count--)
 164       emit_vertex(ctx, idxbuf[start++]);
 165 }
 166
 167 static void
 168 emit_elt16_biased(void *priv, unsigned start, unsigned count)
 169 {
 170    struct push_context *ctx = priv;
 171    uint16_t *idxbuf = ctx->idxbuf;
 172
 173    while (count--)
 174       emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
 175 }
 176
 177 static void
 178 emit_elt32(void *priv, unsigned start, unsigned count)
 179 {
 180    struct push_context *ctx = priv;
 181    uint32_t *idxbuf = ctx->idxbuf;
 182
 183    while (count--)
 184       emit_vertex(ctx, idxbuf[start++]);
 185 }
 186
 187 static void
 188 emit_elt32_biased(void *priv, unsigned start, unsigned count)
 189 {
 190    struct push_context *ctx = priv;
 191    uint32_t *idxbuf = ctx->idxbuf;
 192
 193    while (count--)
 194       emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
 195 }
 196
 197 static void
 198 emit_verts(void *priv, unsigned start, unsigned count)
 199 {
 200    while (count--)
 201       emit_vertex(priv, start++);
 202 }
 203
 204 void
 205 nv50_push_elements_instanced(struct pipe_context *pipe,
 206                              struct pipe_resource *idxbuf,
 207                              unsigned idxsize, int idxbias,
 208                              unsigned mode, unsigned start, unsigned count,
 209                              unsigned i_start, unsigned i_count)
 210 {
 211    struct nv50_context *nv50 = nv50_context(pipe);
 212    struct nouveau_grobj *tesla = nv50->screen->tesla;
 213    struct nouveau_channel *chan = tesla->channel;
 214    struct push_context ctx;
 215    const unsigned p_overhead = 4 + /* begin/end */
 216                                4; /* potential edgeflag enable/disable */
 217    const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
 218                                2; /* potential edgeflag modification */
 219    struct u_split_prim s;
 220    unsigned vtx_size;
 221    boolean nzi = FALSE;
 222    int i;
 223
 224    ctx.nv50 = nv50;
 225    ctx.attr_nr = 0;
 226    ctx.idxbuf = NULL;
 227    ctx.vtx_size = 0;
 228    ctx.edgeflag = 0.5f;
 229    ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
 230
 231    /* map vertex buffers, determine vertex size */
 232    for (i = 0; i < nv50->vtxelt->num_elements; i++) {
 233       struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
 234       struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
 235       struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
 236       unsigned size, nr_components, n;
 237
 238       if (!(nv50->vbo_fifo & (1 << i)))
 239          continue;
 240       n = ctx.attr_nr++;
 241
 242       if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
 243          assert(bo->map);
 244          return;
 245       }
 246       ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
 247       nouveau_bo_unmap(bo);
 248
 249       ctx.attr[n].stride = vb->stride;
 250       ctx.attr[n].divisor = ve->instance_divisor;
 251       if (ctx.attr[n].divisor) {
 252          ctx.attr[n].step = i_start % ve->instance_divisor;
 253          ctx.attr[n].map += i_start * vb->stride;
 254       }
 255
 256       size = util_format_get_component_bits(ve->src_format,
 257                                             UTIL_FORMAT_COLORSPACE_RGB, 0);
 258       nr_components = util_format_get_nr_components(ve->src_format);
 259       switch (size) {
 260       case 8:
 261          switch (nr_components) {
 262          case 1: ctx.attr[n].push = emit_b08_1; break;
 263          case 2: ctx.attr[n].push = emit_b16_1; break;
 264          case 3: ctx.attr[n].push = emit_b08_3; break;
 265          case 4: ctx.attr[n].push = emit_b32_1; break;
 266          }
 267          ctx.vtx_size++;
 268          break;
 269       case 16:
 270          switch (nr_components) {
 271          case 1: ctx.attr[n].push = emit_b16_1; break;
 272          case 2: ctx.attr[n].push = emit_b32_1; break;
 273          case 3: ctx.attr[n].push = emit_b16_3; break;
 274          case 4: ctx.attr[n].push = emit_b32_2; break;
 275          }
 276          ctx.vtx_size += (nr_components + 1) >> 1;
 277          break;
 278       case 32:
 279          switch (nr_components) {
 280          case 1: ctx.attr[n].push = emit_b32_1; break;
 281          case 2: ctx.attr[n].push = emit_b32_2; break;
 282          case 3: ctx.attr[n].push = emit_b32_3; break;
 283          case 4: ctx.attr[n].push = emit_b32_4; break;
 284          }
 285          ctx.vtx_size += nr_components;
 286          break;
 287       default:
 288          assert(0);
 289          return;
 290       }
 291    }
 292    vtx_size = ctx.vtx_size + v_overhead;
 293
 294    /* map index buffer, if present */
 295    if (idxbuf) {
 296       struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
 297
 298       if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
 299          assert(bo->map);
 300          return;
 301       }
 302       ctx.idxbuf = bo->map;
 303       ctx.idxbias = idxbias;
 304       ctx.idxsize = idxsize;
 305       nouveau_bo_unmap(bo);
 306    }
 307
 308    s.priv = &ctx;
 309    s.edge = emit_edgeflag;
 310    if (idxbuf) {
 311       if (idxsize == 1)
 312          s.emit = idxbias ? emit_elt08_biased : emit_elt08;
 313       else
 314       if (idxsize == 2)
 315          s.emit = idxbias ? emit_elt16_biased : emit_elt16;
 316       else
 317          s.emit = idxbias ? emit_elt32_biased : emit_elt32;
 318    } else
 319       s.emit = emit_verts;
 320
 321    /* per-instance loop */
 322    BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 323    OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 324    OUT_RING  (chan, i_start);
 325    while (i_count--) {
 326       unsigned max_verts;
 327       boolean done;
 328
 329       for (i = 0; i < ctx.attr_nr; i++) {
 330          if (!ctx.attr[i].divisor ||
 331               ctx.attr[i].divisor != ++ctx.attr[i].step)
 332             continue;
 333          ctx.attr[i].step = 0;
 334          ctx.attr[i].map += ctx.attr[i].stride;
 335       }
 336
 337       u_split_prim_init(&s, mode, start, count);
 338       do {
 339          if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
 340             FIRE_RING(chan);
 341             if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
 342                assert(0);
 343                return;
 344             }
 345          }
 346
 347          max_verts  = AVAIL_RING(chan);
 348          max_verts -= p_overhead;
 349          max_verts /= vtx_size;
 350
 351          BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 352          OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
 353          done = u_split_prim_next(&s, max_verts);
 354          BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 355          OUT_RING  (chan, 0);
 356       } while (!done);
 357
 358       nzi = TRUE;
 359    }
 360 }