From c7068d79a06c374dd6e732a53436dd9bcdba7e0e Mon Sep 17 00:00:00 2001
From: =?utf8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 21:09:53 +0100
Subject: [PATCH] give each vertex element its own buffer

---
 .../auxiliary/vl/vl_mpeg12_mc_renderer.c      | 157 +++++++++---------
 .../auxiliary/vl/vl_mpeg12_mc_renderer.h      |   6 +-
 2 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ab6f8b092fe..75d0f6ef57c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -48,18 +48,6 @@ struct vertex_shader_consts
    struct vertex4f norm;
 };
 
-struct fragment_shader_consts
-{
-   struct vertex4f multiplier;
-   struct vertex4f div;
-};
-
-struct vert_stream_0
-{
-   struct vertex2f pos;
-   float interlaced;
-};
-
 enum VS_INPUT
 {
    VS_I_RECT,
@@ -550,7 +538,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
-   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
+   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f);
    r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
@@ -558,19 +546,30 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vert_stream_0) * 4 * r->macroblocks_per_batch
+      sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
    );
 
-   for (i = 0; i < 2; ++i) {
-      r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
-      r->vertex_bufs.individual.ref[i].max_index = 4 * r->macroblocks_per_batch - 1;
-      r->vertex_bufs.individual.ref[i].buffer_offset = 0;
+   r->vertex_bufs.individual.interlaced.stride = sizeof(float);
+   r->vertex_bufs.individual.interlaced.max_index = 4 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.interlaced.buffer_offset = 0;
+   /* XXX: Create with usage DYNAMIC or STREAM */
+   r->vertex_bufs.individual.interlaced.buffer = pipe_buffer_create
+   (
+      r->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(float) * 4 * r->macroblocks_per_batch
+   );
+
+   for (i = 0; i < 4; ++i) {
+      r->vertex_bufs.individual.mv[i].stride = sizeof(struct vertex2f);
+      r->vertex_bufs.individual.mv[i].max_index = 4 * r->macroblocks_per_batch - 1;
+      r->vertex_bufs.individual.mv[i].buffer_offset = 0;
       /* XXX: Create with usage DYNAMIC or STREAM */
-      r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
+      r->vertex_bufs.individual.mv[i].buffer = pipe_buffer_create
       (
          r->pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
-         sizeof(struct vertex2f) * 2 * 4 * r->macroblocks_per_batch
+         sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
       );
    }
 
@@ -589,33 +588,33 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* progressive=1.0f interlaced=0.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_INTERLACED].src_offset = 0;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
-   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
+   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 2;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* First ref surface top field texcoord element */
    vertex_elems[VS_I_MV0].src_offset = 0;
    vertex_elems[VS_I_MV0].instance_divisor = 0;
-   vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV0].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV1].src_offset = 0;
    vertex_elems[VS_I_MV1].instance_divisor = 0;
-   vertex_elems[VS_I_MV1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV1].vertex_buffer_index = 4;
    vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface top field texcoord element */
    vertex_elems[VS_I_MV2].src_offset = 0;
    vertex_elems[VS_I_MV2].instance_divisor = 0;
-   vertex_elems[VS_I_MV2].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV2].vertex_buffer_index = 5;
    vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV3].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV3].src_offset = 0;
    vertex_elems[VS_I_MV3].instance_divisor = 0;
-   vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV3].vertex_buffer_index = 6;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
@@ -678,9 +677,9 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 }
 
 void
-gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
+gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, 
                      struct pipe_mpeg12_macroblock *mb, unsigned pos,
-                     struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
+                     struct vertex2f *ycbcr_vb, float *interlaced_vb, struct vertex2f **mv_vb)
 {
    struct vertex2f mo_vec[2];
 
@@ -689,7 +688,6 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
    assert(r);
    assert(mb);
    assert(ycbcr_vb);
-   assert(pos < r->macroblocks_per_batch);
 
    mo_vec[1].x = 0;
    mo_vec[1].y = 0;
@@ -697,19 +695,20 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
       {
-         struct vertex2f *vb;
+         struct vertex2f *vb[2];
 
-         assert(ref_vb && ref_vb[1]);
+         assert(mv_vb && mv_vb[2] && mv_vb[3]);
 
-         vb = ref_vb[1] + pos * 2 * 4;
+         vb[0] = mv_vb[2] + pos;
+         vb[1] = mv_vb[3] + pos;
 
          mo_vec[0].x = mb->pmv[0][1][0];
          mo_vec[0].y = mb->pmv[0][1][1];
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
             }
          }
          else {
@@ -721,11 +720,11 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             if(mb->mvfs[0][1]) mo_vec[0].y += 2;
             if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
 
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
-               vb[i + 1].x = mo_vec[1].x;
-               vb[i + 1].y = mo_vec[1].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
+               vb[1][i].x = mo_vec[1].x;
+               vb[1][i].y = mo_vec[1].y;
             }
          }
 
@@ -734,11 +733,12 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
       {
-         struct vertex2f *vb;
+         struct vertex2f *vb[2];
 
-         assert(ref_vb && ref_vb[0]);
+         assert(mv_vb && mv_vb[0] && mv_vb[1]);
 
-         vb = ref_vb[0] + pos * 2 * 4;
+         vb[0] = mv_vb[0] + pos;
+         vb[1] = mv_vb[1] + pos;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
             mo_vec[0].x = mb->pmv[0][1][0];
@@ -770,17 +770,17 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          }
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
             }
          }
          else {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
-               vb[i + 1].x = mo_vec[1].x;
-               vb[i + 1].y = mo_vec[1].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
+               vb[1][i].x = mo_vec[1].x;
+               vb[1][i].y = mo_vec[1].y;
             }
          }
 
@@ -788,16 +788,11 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         struct vert_stream_0 *vb = ycbcr_vb + pos * 4;
-         struct vert_stream_0 v;
-
-         v.pos.x = mb->mbx;
-         v.pos.y = mb->mby;
-
-         v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-
-         for ( i = 0; i < 4; ++i )
-            memcpy(vb + i, &v, sizeof(v));
+         for ( i = 0; i < 4; ++i ) {
+            ycbcr_vb[i + pos].x = mb->mbx;
+            ycbcr_vb[i + pos].y = mb->mby;
+            interlaced_vb[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+         }
 
          break;
       }
@@ -811,9 +806,10 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
                       unsigned *num_macroblocks)
 {
    unsigned offset[VL_NUM_MACROBLOCK_TYPES];
-   struct vert_stream_0 *ycbcr_vb;
-   struct vertex2f *ref_vb[2];
-   struct pipe_transfer *buf_transfer[3];
+   struct vertex2f *ycbcr_vb;
+   float *interlaced_vb;
+   struct vertex2f *mv_vb[4];
+   struct pipe_transfer *buf_transfer[7];
    unsigned i;
 
    assert(r);
@@ -829,7 +825,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
    for (i = 1; i < VL_NUM_MACROBLOCK_TYPES; ++i)
       offset[i] = offset[i - 1] + num_macroblocks[i - 1];
 
-   ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
+   ycbcr_vb = (struct vertex2f *)pipe_buffer_map
    (
       r->pipe,
       r->vertex_bufs.individual.ycbcr.buffer,
@@ -837,27 +833,36 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
       &buf_transfer[0]
    );
 
-   for (i = 0; i < 2; ++i)
-      ref_vb[i] = (struct vertex2f *)pipe_buffer_map
+   interlaced_vb = (float *)pipe_buffer_map
+   (
+      r->pipe,
+      r->vertex_bufs.individual.interlaced.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer[1]
+   );
+
+   for (i = 0; i < 4; ++i)
+      mv_vb[i] = (struct vertex2f *)pipe_buffer_map
       (
          r->pipe,
-         r->vertex_bufs.individual.ref[i].buffer,
+         r->vertex_bufs.individual.mv[i].buffer,
          PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &buf_transfer[i + 1]
+         &buf_transfer[i + 2]
       );
 
    for (i = 0; i < r->num_macroblocks; ++i) {
       enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
 
-      gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
-                           ycbcr_vb, ref_vb);
+      gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type] * 4,
+                           ycbcr_vb, interlaced_vb, mv_vb);
 
       ++offset[mb_type];
    }
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
-   for (i = 0; i < 2; ++i)
-      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.interlaced.buffer, buf_transfer[1]);
+   for (i = 0; i < 4; ++i)
+      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 2]);
 }
 
 static struct pipe_sampler_view
@@ -901,7 +906,7 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
 
    handler = &r->mbtype_handlers[type];
 
-   r->pipe->set_vertex_buffers(r->pipe, 2 + ref_frames, r->vertex_bufs.all);
+   r->pipe->set_vertex_buffers(r->pipe, 3 + ref_frames * mv_per_frame, r->vertex_bufs.all);
    r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
    if(ref_frames == 2) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 43dbee6f220..fd310e6ac3e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -102,8 +102,10 @@ struct vl_mpeg12_mc_renderer
 
    union
    {
-      struct pipe_vertex_buffer all[4];
-      struct { struct pipe_vertex_buffer rect, ycbcr, ref[2]; } individual;
+      struct pipe_vertex_buffer all[7];
+      struct {
+         struct pipe_vertex_buffer quad, ycbcr, interlaced, mv[4];
+      } individual;
    } vertex_bufs;
 
    struct pipe_surface *surface, *past, *future;
-- 
2.30.2