Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
index 973a746d528ea28389d4e05b18451adf4d1e3847..de83b6a5338b0b363991b5e9a31b2cae5d3cc9dd 100644 (file)
@@ -35,6 +35,7 @@
 #include <util/u_memory.h>
 #include <util/u_keymap.h>
 #include <util/u_sampler.h>
+#include <util/u_draw.h>
 #include <tgsi/tgsi_ureg.h>
 
 #define DEFAULT_BUF_ALIGNMENT 1
 
 struct vertex_stream
 {
-   struct vertex2f pos;
+   struct vertex2s pos;
+   struct vertex2s mv[4];
    struct {
-      float y;
-      float cr;
-      float cb;
+      int8_t y;
+      int8_t cr;
+      int8_t cb;
+      int8_t flag;
    } eb[2][2];
-   float interlaced;
-   float frame_pred;
-   float ref_frames;
-   float bkwd_pred;
-   struct vertex2f mv[4];
 };
 
 enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB_0_0,
-   VS_I_EB_0_1,
-   VS_I_EB_1_0,
-   VS_I_EB_1_1,
-   VS_I_INTERLACED,
-   VS_I_FRAME_PRED,
-   VS_I_REF_FRAMES,
-   VS_I_BKWD_PRED,
    VS_I_MV0,
    VS_I_MV1,
    VS_I_MV2,
    VS_I_MV3,
+   VS_I_EB_0_0,
+   VS_I_EB_0_1,
+   VS_I_EB_1_0,
+   VS_I_EB_1_1,
 
    NUM_VS_INPUTS
 };
@@ -87,24 +81,27 @@ enum VS_OUTPUT
    VS_O_TEX2,
    VS_O_EB_0,
    VS_O_EB_1,
-   VS_O_REF_FRAMES,
-   VS_O_BKWD_PRED,
+   VS_O_INFO,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
    VS_O_MV3
 };
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+        { { 0x20, 0x10 },  { 0x08, 0x04 } },
+        { { 0x02, 0x02 },  { 0x02, 0x02 } },
+        { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src scale;
+   struct ureg_src block_scale, mv_scale;
    struct ureg_src vrect, vpos, eb[2][2], vmv[4];
-   struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred;
    struct ureg_dst t_vpos, t_vtex, t_vmv;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
-   struct ureg_dst o_ref_frames, o_bkwd_pred;
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
    unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -121,120 +118,121 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
    eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
    eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
-   interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
-   frame_pred = ureg_DECL_vs_input(shader, VS_I_FRAME_PRED);
-   ref_frames = ureg_DECL_vs_input(shader, VS_I_REF_FRAMES);
-   bkwd_pred = ureg_DECL_vs_input(shader, VS_I_BKWD_PRED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);   
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
-   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
+   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
-   o_ref_frames = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES);
-   o_bkwd_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED);
-   
+   o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
+
    for (i = 0; i < 4; ++i) {
      vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
      o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
    }
 
    /*
-    * scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    * mv_scale = 0.5 / (dst.width, dst.height);
     *
-    * t_vpos = (vpos + vrect) * scale
+    * t_vpos = (vpos + vrect) * block_scale
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     *
+    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
+    *
+    * o_frame_pred = frame_pred
+    * o_info.x = ref_frames
+    * o_info.y = ref_frames > 0
+    * o_info.z = bkwd_pred
+    *
+    * // Apply motion vectors
+    * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale
+    *
     * o_line.xy = vrect * 8
     * o_line.z = interlaced
     *
-    * if(interlaced) {
+    * if(eb[0][0].w) { //interlaced
     *    t_vtex.x = vrect.x
     *    t_vtex.y = vrect.y * 0.5
     *    t_vtex += vpos
     *
-    *    o_vtex[0].xy = t_vtex * scale
+    *    o_vtex[0].xy = t_vtex * block_scale
     *
     *    t_vtex.y += 0.5
-    *    o_vtex[1].xy = t_vtex * scale
+    *    o_vtex[1].xy = t_vtex * block_scale
     * } else {
     *    o_vtex[0..1].xy = t_vpos
     * }
     * o_vtex[2].xy = t_vpos
     *
-    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
-    *
-    * o_frame_pred = frame_pred
-    * o_ref_frames = ref_frames
-    * o_bkwd_pred = bkwd_pred
-    *
-    * // Apply motion vectors
-    * scale = 0.5 / (dst.width, dst.height);
-    * o_vmv[0..count] = t_vpos + vmv[0..count] * scale
-    *
     */
-   scale = ureg_imm2f(shader,
+   block_scale = ureg_imm2f(shader,
       (float)MACROBLOCK_WIDTH / r->buffer_width,
       (float)MACROBLOCK_HEIGHT / r->buffer_height);
 
+   mv_scale = ureg_imm2f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height);
+
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
+   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[0][1], eb[0][0]);
+   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[1][1], eb[1][0]);
+
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
+            ureg_scalar(eb[1][1], TGSI_SWIZZLE_W));
+   ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
+      ureg_scalar(eb[1][1], TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.0f));
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z),
+            ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
+
+   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos));
+   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
+            vmv[0], vmv[1]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
+            vmv[2], vmv[3]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
+
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), 
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
       vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
-   ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label);
+   ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
+      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
+      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
 
       ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X),
          ureg_scalar(vrect, TGSI_SWIZZLE_Y),
          ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
-   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[0][1], eb[0][0]);
-   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[1][1], eb[1][0]);
-
-   ureg_MOV(shader, ureg_writemask(o_ref_frames, TGSI_WRITEMASK_X), ref_frames);
-   ureg_MOV(shader, ureg_writemask(o_bkwd_pred, TGSI_WRITEMASK_X), bkwd_pred);
-
-   scale = ureg_imm2f(shader,
-      0.5f / r->buffer_width,
-      0.5f / r->buffer_height);
-
-   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), scale, vmv[0], ureg_src(t_vpos));
-   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), scale, vmv[2], ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
-            vmv[0], vmv[1]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
-            vmv[2], vmv[3]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
-
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
    ureg_release_temporary(shader, t_vmv);
@@ -272,13 +270,12 @@ static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
    struct ureg_src tc[3], sampler[3], eb[2];
-   struct ureg_dst texel, t_tc, t_eb_info, tmp;
+   struct ureg_dst texel, t_tc, t_eb_info;
    unsigned i, label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
    t_eb_info = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
@@ -319,12 +316,12 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
             ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]);
          }
 
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
       ureg_ENDIF(shader);
    }
 
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, t_eb_info);
-   ureg_release_temporary(shader, tmp);
 
    return texel;
 }
@@ -332,13 +329,12 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 static struct ureg_dst
 fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src ref_frames, bkwd_pred;
+   struct ureg_src info;
    struct ureg_src tc[4], sampler[2];
-   struct ureg_dst ref[2], tmp, result;
+   struct ureg_dst ref[2], result;
    unsigned i, intra_label, bi_label, label;
 
-   ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
-   bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
+   info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 4; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
@@ -348,28 +344,29 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ref[i] = ureg_DECL_temporary(shader);
    }
 
-   tmp = ureg_DECL_temporary(shader);
    result = ureg_DECL_temporary(shader);
 
-   ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f));
 
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label);
-      ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
+   ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Y), &intra_label);
+      ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
 
-      ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
+      ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label);
 
          /*
           * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
           */
-         ureg_IF(shader, bkwd_pred, &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+         ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
+         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ELSE(shader, &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
+         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ENDIF(shader);
 
+      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
       ureg_ELSE(shader, &bi_label);
 
          /*
@@ -378,22 +375,22 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           * else
           *    ref[0..1] = tex(tc[2..3], sampler[0..1])
           */
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
-
-         ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
+         ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
             ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
             tc[3], tc[2]);
-         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
+         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
-         ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X),
+         ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f),
             ureg_src(ref[0]), ureg_src(ref[1]));
 
+      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
       ureg_ENDIF(shader);
+   ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
-   ureg_release_temporary(shader, tmp);
 
    return result;
 }
@@ -417,7 +414,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    result = fetch_ref(shader, field);
 
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(texel), ureg_src(result));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -557,37 +554,25 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
    /* Position element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   for (i = 0; i < 4; ++i)
+      /* motion vector 0..4 element */
+      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
    /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* progressive=0.0f interlaced=1.0f */
-   vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
+   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
-   /* frame=0.0f field=1.0f */
-   vertex_elems[VS_I_FRAME_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   /* intra=-1.0f forward/backward=1.0f bi=0.0f */
-   vertex_elems[VS_I_REF_FRAMES].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   /* forward=0.0f backward=1.0f */
-   vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   for (i = 0; i < 4; ++i)
-      /* motion vector 0..4 element */
-      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 13, 1);
+   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
       r->pipe, NUM_VS_INPUTS, vertex_elems);
@@ -646,13 +631,12 @@ static struct pipe_sampler_view
 }
 
 static void
-get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
+get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
 {
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
       {
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-
             mv[2].x = mb->pmv[0][1][0];
             mv[2].y = mb->pmv[0][1][1];
 
@@ -712,20 +696,6 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
    }
 }
 
-static bool
-empty_block(enum pipe_video_chroma_format chroma_format,
-            unsigned cbp, unsigned component,
-            unsigned x, unsigned y)
-{
-   /* TODO: Implement 422, 444 */
-   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   if(component == 0) /*luma*/
-      return !(cbp  & (1 << (5 - (x + y * 2))));
-   else /*cr cb*/
-      return !(cbp & (1 << (2 - component)));
-}
-
 static void
 grab_vectors(struct vl_mpeg12_mc_renderer *r,
              struct vl_mpeg12_mc_buffer *buffer,
@@ -742,26 +712,26 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    stream.pos.y = mb->mby;
    for ( i = 0; i < 2; ++i) {
       for ( j = 0; j < 2; ++j) {
-         stream.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
-         stream.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i);
-         stream.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i);         
+         stream.eb[i][j].y = !(mb->cbp & (*r->empty_block_mask)[0][i][j]);
+         stream.eb[i][j].cr = !(mb->cbp & (*r->empty_block_mask)[1][i][j]);
+         stream.eb[i][j].cb = !(mb->cbp & (*r->empty_block_mask)[2][i][j]);
       }
    }
-   stream.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-   stream.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
-   stream.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
+   stream.eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   stream.eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
+   stream.eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         stream.ref_frames = -1.0f;
+         stream.eb[1][1].flag = -1;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream.ref_frames = 1.0f;
+         stream.eb[1][1].flag = 1;
          break;
-        
+
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream.ref_frames = 0.0f;
+         stream.eb[1][1].flag = 0;
          break;
 
       default:
@@ -769,7 +739,7 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    }
 
    get_motion_vectors(mb, stream.mv);
-   vl_vb_add_block(&buffer->vertex_stream, (float*)&stream);
+   vl_vb_add_block(&buffer->vertex_stream, &stream);
 }
 
 static void
@@ -786,7 +756,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         if (!empty_block(r->chroma_format, cbp, 0, x, y)) {
+         if (cbp & (*r->empty_block_mask)[0][y][x]) {
             vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
@@ -797,7 +767,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r,
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    for (tb = 1; tb < 3; ++tb) {
-      if (!empty_block(r->chroma_format, cbp, tb, 0, 0)) {
+      if (cbp & (*r->empty_block_mask)[tb][0][0]) {
          if(tb == 1)
             vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks);
          else
@@ -860,6 +830,10 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->chroma_format = chroma_format;
    renderer->bufmode = bufmode;
 
+   /* TODO: Implement 422, 444 */
+   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   renderer->empty_block_mask = &const_empty_block_mask_420;
+
    renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
                                            texview_map_delete);
    if (!renderer->texview_map)
@@ -914,7 +888,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    template.width0 = renderer->buffer_width;
    template.height0 = renderer->buffer_height;
    template.depth0 = 1;
-   template.usage = PIPE_USAGE_DYNAMIC;
+   template.usage = PIPE_USAGE_STATIC;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
 
@@ -959,8 +933,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(
-      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch, 
-      sizeof(struct vertex_stream) / sizeof(float),
+      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch,
       renderer->vertex_stream_stride);
 
    return true;