g3dvl: Modularized rendering, refactored to accommodate VAAPI, other APIs.
authorYounes Manton <younes.m@gmail.com>
Sat, 16 Aug 2008 17:04:23 +0000 (13:04 -0400)
committerYounes Manton <younes.m@gmail.com>
Sat, 16 Aug 2008 17:04:54 +0000 (13:04 -0400)
26 files changed:
src/gallium/state_trackers/g3dvl/Makefile
src/gallium/state_trackers/g3dvl/vl_basic_csc.c [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_basic_csc.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_context.c
src/gallium/state_trackers/g3dvl/vl_context.h
src/gallium/state_trackers/g3dvl/vl_csc.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_data.c
src/gallium/state_trackers/g3dvl/vl_data.h
src/gallium/state_trackers/g3dvl/vl_defs.h
src/gallium/state_trackers/g3dvl/vl_display.c [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_display.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_render.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_screen.c [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_screen.h [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_shader_build.c
src/gallium/state_trackers/g3dvl/vl_shader_build.h
src/gallium/state_trackers/g3dvl/vl_surface.c
src/gallium/state_trackers/g3dvl/vl_surface.h
src/gallium/state_trackers/g3dvl/vl_types.h
src/gallium/state_trackers/g3dvl/vl_util.c
src/gallium/state_trackers/g3dvl/vl_util.h
src/libXvMC/block.c
src/libXvMC/context.c
src/libXvMC/surface.c

index c6a22cad4e2f80e01132df0d648f5547075c5768..9995c554ab88b186375a0be77313704cab4abf78 100644 (file)
@@ -1,5 +1,6 @@
 TARGET         = libg3dvl.a
-OBJECTS                = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o
+OBJECTS                = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_data.o vl_shader_build.o vl_util.o vl_basic_csc.o \
+                 vl_r16snorm_mc.o
 GALLIUMDIR     = ../..
 
 CFLAGS         += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl
@@ -15,4 +16,3 @@ ${TARGET}: ${OBJECTS}
 
 clean:
        rm -rf ${OBJECTS} ${TARGET}
-
diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c
new file mode 100644 (file)
index 0000000..ea003a3
--- /dev/null
@@ -0,0 +1,694 @@
+#define VL_INTERNAL
+#include "vl_basic_csc.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <pipe/p_context.h>
+#include <pipe/p_winsys.h>
+#include <pipe/p_state.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include "vl_csc.h"
+#include "vl_surface.h"
+#include "vl_shader_build.h"
+#include "vl_types.h"
+
+struct vlVertexShaderConsts
+{
+       struct vlVertex4f       src_scale;
+       struct vlVertex4f       src_trans;
+};
+
+struct vlFragmentShaderConsts
+{
+       struct vlVertex4f       bias;
+       float                   matrix[16];
+};
+
+struct vlBasicCSC
+{
+       struct vlCSC                            base;
+
+       struct pipe_context                     *pipe;
+       struct pipe_viewport_state              viewport;
+       struct pipe_framebuffer_state           framebuffer;
+       void                                    *sampler;
+       void                                    *vertex_shader, *fragment_shader;
+       struct pipe_vertex_buffer               vertex_bufs[2];
+       struct pipe_vertex_element              vertex_elems[2];
+       struct pipe_constant_buffer             vs_const_buf, fs_const_buf;
+};
+
+static int vlResizeFrameBuffer
+(
+       struct vlCSC *csc,
+       unsigned int width,
+       unsigned int height
+)
+{
+       struct vlBasicCSC       *basic_csc;
+       struct pipe_context     *pipe;
+
+       assert(csc);
+
+       basic_csc = (struct vlBasicCSC*)csc;
+       pipe = basic_csc->pipe;
+
+       if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height)
+               return 0;
+
+       if (basic_csc->framebuffer.cbufs[0])
+               pipe->winsys->surface_release
+               (
+                       pipe->winsys,
+                       &basic_csc->framebuffer.cbufs[0]
+               );
+
+       basic_csc->viewport.scale[0] = width;
+       basic_csc->viewport.scale[1] = height;
+       basic_csc->viewport.scale[2] = 1;
+       basic_csc->viewport.scale[3] = 1;
+       basic_csc->viewport.translate[0] = 0;
+       basic_csc->viewport.translate[1] = 0;
+       basic_csc->viewport.translate[2] = 0;
+       basic_csc->viewport.translate[3] = 0;
+
+       basic_csc->framebuffer.width = width;
+       basic_csc->framebuffer.height = height;
+       basic_csc->framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys);
+       pipe->winsys->surface_alloc_storage
+       (
+               pipe->winsys,
+               basic_csc->framebuffer.cbufs[0],
+               width,
+               height,
+               PIPE_FORMAT_A8R8G8B8_UNORM,
+               /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */
+               PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE,
+               0
+       );
+
+       return 0;
+}
+
+static int vlBegin
+(
+       struct vlCSC *csc
+)
+{
+       struct vlBasicCSC       *basic_csc;
+       struct pipe_context     *pipe;
+
+       assert(csc);
+
+       basic_csc = (struct vlBasicCSC*)csc;
+       pipe = basic_csc->pipe;
+
+       pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer);
+       pipe->set_viewport_state(pipe, &basic_csc->viewport);
+       pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler);
+       /* Source texture set in vlPutSurface() */
+       pipe->bind_vs_state(pipe, basic_csc->vertex_shader);
+       pipe->bind_fs_state(pipe, basic_csc->fragment_shader);
+       pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs);
+       pipe->set_vertex_elements(pipe, 2, basic_csc->vertex_elems);
+       pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &basic_csc->vs_const_buf);
+       pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &basic_csc->fs_const_buf);
+
+       return 0;
+}
+
+static int vlPutPictureCSC
+(
+       struct vlCSC *csc,
+       struct vlSurface *surface,
+       int srcx,
+       int srcy,
+       int srcw,
+       int srch,
+       int destx,
+       int desty,
+       int destw,
+       int desth,
+       enum vlPictureType picture_type
+)
+{
+       struct vlBasicCSC               *basic_csc;
+       struct pipe_context             *pipe;
+       struct vlVertexShaderConsts     *vs_consts;
+
+       assert(csc);
+       assert(surface);
+
+       basic_csc = (struct vlBasicCSC*)csc;
+       pipe = basic_csc->pipe;
+
+       vs_consts = pipe->winsys->buffer_map
+       (
+               pipe->winsys,
+               basic_csc->vs_const_buf.buffer,
+               PIPE_BUFFER_USAGE_CPU_WRITE
+       );
+
+       vs_consts->src_scale.x = srcw / (float)surface->texture->width[0];
+       vs_consts->src_scale.y = srch / (float)surface->texture->height[0];
+       vs_consts->src_scale.z = 1;
+       vs_consts->src_scale.w = 1;
+       vs_consts->src_trans.x = srcx / (float)surface->texture->width[0];
+       vs_consts->src_trans.y = srcy / (float)surface->texture->height[0];
+       vs_consts->src_trans.z = 0;
+       vs_consts->src_trans.w = 0;
+
+       pipe->winsys->buffer_unmap(pipe->winsys, basic_csc->vs_const_buf.buffer);
+
+       pipe->set_sampler_textures(pipe, 1, &surface->texture);
+       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+
+       return 0;
+}
+
+static int vlEnd
+(
+       struct vlCSC *csc
+)
+{
+       assert(csc);
+
+       return 0;
+}
+
+static struct pipe_surface* vlGetFrameBuffer
+(
+       struct vlCSC *csc
+)
+{
+       struct vlBasicCSC       *basic_csc;
+
+       assert(csc);
+
+       basic_csc = (struct vlBasicCSC*)csc;
+
+       return basic_csc->framebuffer.cbufs[0];
+}
+
+static int vlDestroy
+(
+       struct vlCSC *csc
+)
+{
+       struct vlBasicCSC       *basic_csc;
+       struct pipe_context     *pipe;
+       unsigned int            i;
+
+       assert(csc);
+
+       basic_csc = (struct vlBasicCSC*)csc;
+       pipe = basic_csc->pipe;
+
+       if (basic_csc->framebuffer.cbufs[0])
+               pipe->winsys->surface_release
+               (
+                       pipe->winsys,
+                       &basic_csc->framebuffer.cbufs[0]
+               );
+
+       pipe->delete_sampler_state(pipe, basic_csc->sampler);
+       pipe->delete_vs_state(pipe, basic_csc->vertex_shader);
+       pipe->delete_fs_state(pipe, basic_csc->fragment_shader);
+
+       for (i = 0; i < 2; ++i)
+               pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vertex_bufs[i].buffer);
+
+       pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer);
+       pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer);
+
+       free(basic_csc);
+
+       return 0;
+}
+
+/*
+ * Represents 2 triangles in a strip in normalized coords.
+ * Used to render the surface onto the frame buffer.
+ */
+static const struct vlVertex2f surface_verts[4] =
+{
+       {0.0f, 0.0f},
+       {0.0f, 1.0f},
+       {1.0f, 0.0f},
+       {1.0f, 1.0f}
+};
+
+/*
+ * Represents texcoords for the above. We can use the position values directly.
+ * TODO: Duplicate these in the shader, no need to create a buffer.
+ */
+static const struct vlVertex2f *surface_texcoords = surface_verts;
+
+/*
+ * Identity color conversion constants, for debugging
+ */
+static const struct vlFragmentShaderConsts identity =
+{
+       {
+               0.0f, 0.0f, 0.0f, 0.0f
+       },
+       {
+               1.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 1.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 1.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 1.0f
+       }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct vlFragmentShaderConsts bt_601 =
+{
+       {
+               0.0f,           0.501960784f,   0.501960784f,   0.0f
+       },
+       {
+               1.0f,           0.0f,           1.371f,         0.0f,
+               1.0f,           -0.336f,        -0.698f,        0.0f,
+               1.0f,           1.732f,         0.0f,           0.0f,
+               0.0f,           0.0f,           0.0f,           1.0f
+       }
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+static const struct vlFragmentShaderConsts bt_601_full =
+{
+       {
+               0.062745098f,   0.501960784f,   0.501960784f,   0.0f
+       },
+       {
+               1.164f,         0.0f,           1.596f,         0.0f,
+               1.164f,         -0.391f,        -0.813f,        0.0f,
+               1.164f,         2.018f,         0.0f,           0.0f,
+               0.0f,           0.0f,           0.0f,           1.0f
+       }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const struct vlFragmentShaderConsts bt_709 =
+{
+       {
+               0.0f,           0.501960784f,   0.501960784f,   0.0f
+       },
+       {
+               1.0f,           0.0f,           1.540f,         0.0f,
+               1.0f,           -0.183f,        -0.459f,        0.0f,
+               1.0f,           1.816f,         0.0f,           0.0f,
+               0.0f,           0.0f,           0.0f,           1.0f
+       }
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+const struct vlFragmentShaderConsts bt_709_full =
+{
+       {
+               0.062745098f,   0.501960784f,   0.501960784f,   0.0f
+       },
+       {
+               1.164f,         0.0f,           1.793f,         0.0f,
+               1.164f,         -0.213f,        -0.534f,        0.0f,
+               1.164f,         2.115f,         0.0f,           0.0f,
+               0.0f,           0.0f,           0.0f,           1.0f
+       }
+};
+
+static int vlCreateVertexShader
+(
+       struct vlBasicCSC *csc
+)
+{
+       const unsigned int              max_tokens = 50;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(context);
+
+       pipe = csc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Vertex texcoords
+        */
+       for (i = 0; i < 2; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale texcoord rect to source size
+        * decl c1              ; Translation vector to move texcoord rect into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Vertex texcoords
+        */
+       for (i = 0; i < 2; i++)
+       {
+               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0 */
+       decl = vl_decl_temps(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mov o0, i0           ; Move pos in to pos out */
+       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i1, c0       ; Scale unit texcoord rect to source size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o1, t0, c1       ; Translate texcoord rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       csc->vertex_shader = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShader
+(
+       struct vlBasicCSC *csc
+)
+{
+       const unsigned int              max_tokens = 50;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(context);
+
+       pipe = csc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /* decl i0              ; Texcoords for s0 */
+       decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl c0              ; Bias vector for CSC
+        * decl c1-c4           ; CSC matrix c1-c4
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0              ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0 */
+       decl = vl_decl_temps(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl s0              ; Sampler for tex containing picture to display */
+       decl = vl_decl_samplers(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* tex2d t0, i0, s0     ; Read src pixel */
+       inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* sub t0, t0, c0       ; Subtract bias vector from pixel */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * dp4 o0.x, t0, c1     ; Multiply pixel by the color conversion matrix
+        * dp4 o0.y, t0, c2
+        * dp4 o0.z, t0, c3
+        * dp4 o0.w, t0, c4     ; XXX: Don't need 4th coefficient
+        */
+       for (i = 0; i < 4; ++i)
+       {
+               inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       csc->fragment_shader = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateDataBufs
+(
+       struct vlBasicCSC *csc
+)
+{
+       struct pipe_context *pipe;
+
+       assert(csc);
+
+       pipe = csc->pipe;
+
+       /*
+       Create our vertex buffer and vertex buffer element
+       VB contains 4 vertices that render a quad covering the entire window
+       to display a rendered surface
+       Quad is rendered as a tri strip
+       */
+       csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f);
+       csc->vertex_bufs[0].max_index = 3;
+       csc->vertex_bufs[0].buffer_offset = 0;
+       csc->vertex_bufs[0].buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_VERTEX,
+               sizeof(struct vlVertex2f) * 4
+       );
+
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               surface_verts,
+               sizeof(struct vlVertex2f) * 4
+       );
+
+       pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[0].buffer);
+
+       csc->vertex_elems[0].src_offset = 0;
+       csc->vertex_elems[0].vertex_buffer_index = 0;
+       csc->vertex_elems[0].nr_components = 2;
+       csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+       /*
+       Create our texcoord buffer and texcoord buffer element
+       Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
+       */
+       csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f);
+       csc->vertex_bufs[1].max_index = 3;
+       csc->vertex_bufs[1].buffer_offset = 0;
+       csc->vertex_bufs[1].buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_VERTEX,
+               sizeof(struct vlVertex2f) * 4
+       );
+
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               surface_texcoords,
+               sizeof(struct vlVertex2f) * 4
+       );
+
+       pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[1].buffer);
+
+       csc->vertex_elems[1].src_offset = 0;
+       csc->vertex_elems[1].vertex_buffer_index = 1;
+       csc->vertex_elems[1].nr_components = 2;
+       csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+       /*
+       Create our vertex shader's constant buffer
+       Const buffer contains scaling and translation vectors
+       */
+       csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
+       csc->vs_const_buf.buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_CONSTANT,
+               csc->vs_const_buf.size
+       );
+
+       /*
+       Create our fragment shader's constant buffer
+       Const buffer contains the color conversion matrix and bias vectors
+       */
+       csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
+       csc->fs_const_buf.buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_CONSTANT,
+               csc->fs_const_buf.size
+       );
+
+       /*
+       TODO: Refactor this into a seperate function,
+       allow changing the CSC matrix at runtime to switch between regular & full versions
+       */
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               &bt_601,
+               sizeof(struct vlFragmentShaderConsts)
+       );
+
+       pipe->winsys->buffer_unmap(pipe->winsys, csc->fs_const_buf.buffer);
+
+       return 0;
+}
+
+static int vlInit
+(
+       struct vlBasicCSC *csc
+)
+{
+       struct pipe_context             *pipe;
+       struct pipe_sampler_state       sampler;
+
+       assert(csc);
+
+       pipe = csc->pipe;
+
+       /* Delay creating the FB until vlPutSurface() so we know window size */
+       csc->framebuffer.num_cbufs = 1;
+       csc->framebuffer.cbufs[0] = NULL;
+       csc->framebuffer.zsbuf = NULL;
+
+       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+       sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+       sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+       sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+       sampler.compare_func = PIPE_FUNC_ALWAYS;
+       sampler.normalized_coords = 1;
+       /*sampler.prefilter = ;*/
+       /*sampler.shadow_ambient = ;*/
+       /*sampler.lod_bias = ;*/
+       /*sampler.min_lod = ;*/
+       /*sampler.max_lod = ;*/
+       /*sampler.border_color[i] = ;*/
+       /*sampler.max_anisotropy = ;*/
+       csc->sampler = pipe->create_sampler_state(pipe, &sampler);
+
+       vlCreateVertexShader(csc);
+       vlCreateFragmentShader(csc);
+       vlCreateDataBufs(csc);
+
+       return 0;
+}
+
+int vlCreateBasicCSC
+(
+       struct pipe_context *pipe,
+       struct vlCSC **csc
+)
+{
+       struct vlBasicCSC *basic_csc;
+
+       assert(pipe);
+       assert(csc);
+
+       basic_csc = calloc(1, sizeof(struct vlBasicCSC));
+
+       if (!basic_csc)
+               return 1;
+
+       basic_csc->base.vlResizeFrameBuffer = &vlResizeFrameBuffer;
+       basic_csc->base.vlBegin = &vlBegin;
+       basic_csc->base.vlPutPicture = &vlPutPictureCSC;
+       basic_csc->base.vlEnd = &vlEnd;
+       basic_csc->base.vlGetFrameBuffer = &vlGetFrameBuffer;
+       basic_csc->base.vlDestroy = &vlDestroy;
+       basic_csc->pipe = pipe;
+
+       vlInit(basic_csc);
+
+       *csc = &basic_csc->base;
+
+       return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.h b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h
new file mode 100644 (file)
index 0000000..2e17f1d
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef vl_basic_csc_h
+#define vl_basic_csc_h
+
+struct pipe_context;
+struct vlCSC;
+
+int vlCreateBasicCSC
+(
+       struct pipe_context *pipe,
+       struct vlCSC **csc
+);
+
+#endif
index 5616de0ba413726bddbd878e3c44fe74969ef56f..56d360c05b92c85ea2c51035efbc2443f28f6d73 100644 (file)
+#define VL_INTERNAL
 #include "vl_context.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <pipe/p_context.h>
-#include <pipe/p_winsys.h>
-#include <pipe/p_screen.h>
 #include <pipe/p_state.h>
-#include <pipe/p_inlines.h>
-#include <pipe/p_shader_tokens.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
-#include "vl_shader_build.h"
-#include "vl_data.h"
-#include "vl_defs.h"
-#include "vl_util.h"
+#include "vl_render.h"
+#include "vl_r16snorm_mc.h"
+#include "vl_csc.h"
+#include "vl_basic_csc.h"
 
-static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 50;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-       
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Vertex texcoords
-        */
-       for (i = 0; i < 2; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Vertex texcoords
-        */
-       for (i = 0; i < 2; i++)
-       {
-               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * mov o0, i0           ; Move pos in to pos out
-        * mov o1, i1           ; Move texcoord in to texcoord out */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 50;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /* decl i0              ; Texcoords for s0 */
-       decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0              ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl s0              ; Sampler for tex containing picture to display */
-       decl = vl_decl_samplers(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* tex2d t0, i0, s0     ; Read src pixel */
-       inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c0       ; Subtract bias vector from pixel */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * dp4 o0.x, t0, c1     ; Multiply pixel by the color conversion matrix
-        * dp4 o0.y, t0, c2
-        * dp4 o0.z, t0, c3
-        * dp4 o0.w, t0, c4     ; XXX: Don't need 4th coefficient
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlInitIDCT(struct VL_CONTEXT *context)
-{
-       struct pipe_context             *pipe;
-       struct pipe_sampler_state       sampler;
-       struct pipe_texture             template;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       
-       context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH;
-       context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT;
-       context->states.idct.viewport.scale[2] = 1;
-       context->states.idct.viewport.scale[3] = 1;
-       context->states.idct.viewport.translate[0] = 0;
-       context->states.idct.viewport.translate[1] = 0;
-       context->states.idct.viewport.translate[2] = 0;
-       context->states.idct.viewport.translate[3] = 0;
-       
-       context->states.idct.render_target.width = VL_BLOCK_WIDTH;
-       context->states.idct.render_target.height = VL_BLOCK_HEIGHT;
-       context->states.idct.render_target.num_cbufs = 1;
-       context->states.idct.render_target.zsbuf = NULL;
-       
-       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-       sampler.compare_func = PIPE_FUNC_ALWAYS;
-       sampler.normalized_coords = 1;
-       /*sampler.prefilter = ;*/
-       /*sampler.shadow_ambient = ;*/
-       /*sampler.lod_bias = ;*/
-       sampler.min_lod = 0;
-       /*sampler.max_lod = ;*/
-       /*sampler.border_color[i] = ;*/
-       /*sampler.max_anisotropy = ;*/
-       context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler);
-       
-       memset(&template, 0, sizeof(struct pipe_texture));
-       template.target = PIPE_TEXTURE_2D;
-       template.format = PIPE_FORMAT_A8L8_UNORM;
-       template.last_level = 0;
-       template.width[0] = 8;
-       template.height[0] = 8;
-       template.depth[0] = 1;
-       template.compressed = 0;
-       pf_get_block(template.format, &template.block);
-       
-       context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template);
-       
-       template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-       template.width[0] = 16;
-       template.height[0] = 1;
-       
-       context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template);
-       
-       for (i = 0; i < 2; ++i)
-       {
-               context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i];
-               context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i];
-               /*
-               context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F);
-               context->states.idct.vertex_bufs[i].max_index = 3;
-               context->states.idct.vertex_bufs[i].buffer_offset = 0;
-               context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create
-               (
-                       pipe->winsys,
-                       1,
-                       PIPE_BUFFER_USAGE_VERTEX,
-                       sizeof(struct VL_VERTEX2F) * 4
-               );
-       
-               context->states.idct.vertex_buf_elems[i].src_offset = 0;
-               context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i;
-               context->states.idct.vertex_buf_elems[i].nr_components = 2;
-               context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
-               */
-       }
-       
-       vlCreateVertexShaderFrameIDCT(context);
-       vlCreateFragmentShaderFrameIDCT(context);
-       
-       return 0;
-}
-
-static int vlDestroyIDCT(struct VL_CONTEXT *context)
-{
-       //unsigned int i;
-       
-       assert(context);
-       
-       context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler);
-       
-       //for (i = 0; i < 2; ++i)
-               //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer);
-       
-       pipe_texture_release(&context->states.idct.texture);
-       pipe_texture_release(&context->states.idct.basis);
-       
-       //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs);
-       //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs);
-       
-       //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer);
-       //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 50;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Luma texcoords
-        * decl i2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale unit rect to macroblock size
-        * decl c1              ; Translation vector to move macroblock into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Luma texcoords
-        * decl o2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0 */
-       decl = vl_decl_temps(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add o0, t0, c1       ; Translate rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * mov o1, i1           ; Move input luma texcoords to output
-        * mov o2, i2           ; Move input chroma texcoords to output
-        */
-       for (i = 1; i < 3; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /*
-        * decl i0                      ; Texcoords for s0
-        * decl i1                      ; Texcoords for s1, s2
-        */
-       for (i = 0; i < 2; ++i)
-       {
-               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0                      ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0, t1 */
-       decl = vl_decl_temps(0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl s0                      ; Sampler for luma texture
-        * decl s1                      ; Sampler for chroma Cb texture
-        * decl s2                      ; Sampler for chroma Cr texture
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               decl = vl_decl_samplers(i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
-       }
-       
-       /*
-        * tex2d t1, i0, s0             ; Read texel from luma texture
-        * mov t0.x, t1.x               ; Move luma sample into .x component
-        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-        * mov t0.y, t1.x               ; Move Cb sample into .y component
-        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
-        * mov t0.z, t1.x               ; Move Cr sample into .z component
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-       }
-       
-       /* mul o0, t0, c0               ; Rescale texel to correct range */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Luma texcoords
-        * decl i2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale unit rect to macroblock size
-        * decl c1              ; Translation vector to move macroblock into position
-        * decl c2              ; Unused
-        * decl c3              ; Translation vector to move ref macroblock texcoords into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Luma texcoords
-        * decl o2              ; Chroma texcoords
-        * decl o3              ; Ref macroblock texcoords
-        */
-       for (i = 0; i < 4; i++)
-       {
-               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0 */
-       decl = vl_decl_temps(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add o0, t0, c1       ; Translate rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * mov o1, i1           ; Move input luma texcoords to output
-        * mov o2, i2           ; Move input chroma texcoords to output
-        */
-       for (i = 1; i < 3; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* add o3, t0, c3       ; Translate rect into position on ref macroblock */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Luma texcoords
-        * decl i2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration
-               (
-                       &decl,
-                       &tokens[ti],
-                       header,
-                       max_tokens - ti
-               );
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale unit rect to macroblock size
-        * decl c1              ; Translation vector to move macroblock into position
-        * decl c2              ; Denorm coefficients
-        * decl c3              ; Translation vector to move top field ref macroblock texcoords into position
-        * decl c4              ; Translation vector to move bottom field ref macroblock texcoords into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Luma texcoords
-        * decl o2              ; Chroma texcoords
-        * decl o3              ; Top field ref macroblock texcoords
-        * decl o4              ; Bottom field ref macroblock texcoords
-        * decl o5              ; Denormalized vertex pos
-        */
-       for (i = 0; i < 6; i++)
-       {
-               decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0, t1 */
-       decl = vl_decl_temps(0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add t1, t0, c1       ; Translate rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mov o0, t1           ; Move vertex pos to output */
-       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-       mov o1, i1              ; Move input luma texcoords to output
-       mov o2, i2              ; Move input chroma texcoords to output
-       */
-       for (i = 1; i < 3; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* add o3, t0, c3       ; Translate top field rect into position on ref macroblock
-          add o4, t0, c4       ; Translate bottom field rect into position on ref macroblock */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* mul o5, t1, c2       ; Denorm vertex pos */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /*
-        * decl i0                      ; Texcoords for s0
-        * decl i1                      ; Texcoords for s1, s2
-        * decl i2                      ; Texcoords for s3
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0                      ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0, t1 */
-       decl = vl_decl_temps(0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl s0                      ; Sampler for luma texture
-        * decl s1                      ; Sampler for chroma Cb texture
-        * decl s2                      ; Sampler for chroma Cr texture
-        * decl s3                      ; Sampler for ref surface texture
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               decl = vl_decl_samplers(i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * tex2d t1, i0, s0             ; Read texel from luma texture
-        * mov t0.x, t1.x               ; Move luma sample into .x component
-        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-        * mov t0.y, t1.x               ; Move Cb sample into .y component
-        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
-        * mov t0.z, t1.x               ; Move Cr sample into .z component
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-       }
-       
-       /* mul t0, t0, c0               ; Rescale texel to correct range */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* tex2d t1, i2, s3             ; Read texel from ref macroblock */
-       inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add o0, t0, t1               ; Add ref and differential to form final output */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 200;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /*
-        * decl i0                      ; Texcoords for s0
-        * decl i1                      ; Texcoords for s1, s2
-        * decl i2                      ; Texcoords for s3
-        * decl i3                      ; Texcoords for s3
-        * decl i4                      ; Denormalized vertex pos
-        */
-       for (i = 0; i < 5; ++i)
-       {
-               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
-        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0                      ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0-t4 */
-       decl = vl_decl_temps(0, 4);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl s0                      ; Sampler for luma texture
-        * decl s1                      ; Sampler for chroma Cb texture
-        * decl s2                      ; Sampler for chroma Cr texture
-        * decl s3                      ; Sampler for ref surface texture
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               decl = vl_decl_samplers(i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * tex2d t1, i0, s0             ; Read texel from luma texture
-        * mov t0.x, t1.x               ; Move luma sample into .x component
-        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-        * mov t0.y, t1.x               ; Move Cb sample into .y component
-        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
-        * mov t0.z, t1.x               ; Move Cr sample into .z component
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-       }
-       
-       /* mul t0, t0, c0               ; Rescale texel to correct range */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * tex2d t1, i2, s3             ; Read texel from ref macroblock top field
-        * tex2d t2, i3, s3             ; Read texel from ref macroblock bottom field
-        */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* XXX: Pos values off by 0.5? */
-       /* sub t4, i4.y, c1.x           ; Sub 0.5 from denormalized pos */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* floor t3, t3                 ; Get rid of fractional part */
-       inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t3, t3, c1.y             ; Multiply by 2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t3, t4, t3               ; Subtract from original Y to get Y % 2 */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
-       /* lerp t1, t3, t1, t2          ; Choose between top and bottom fields based on Y % 2 */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add o0, t0, t1               ; Add ref and differential to form final output */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Luma texcoords
-        * decl i2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale unit rect to macroblock size
-        * decl c1              ; Translation vector to move macroblock into position
-        * decl c2              ; Unused
-        * decl c3              ; Translation vector to move past ref macroblock texcoords into position
-        * decl c4              ; Unused
-        * decl c5              ; Translation vector to move future ref macroblock texcoords into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Luma texcoords
-        * decl o2              ; Chroma texcoords
-        * decl o3              ; Past ref macroblock texcoords
-        * decl o4              ; Future ref macroblock texcoords
-        */
-       for (i = 0; i < 5; i++)
-       {
-               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0 */
-       decl = vl_decl_temps(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add o0, t0, c1       ; Translate rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * mov o1, i1           ; Move input luma texcoords to output
-        * mov o2, i2           ; Move input chroma texcoords to output
-        */
-       for (i = 1; i < 3; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* add o3, t0, c3       ; Translate rect into position on past ref macroblock
-          add o4, t0, c5       ; Translate rect into position on future ref macroblock */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;   
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Luma texcoords
-        * decl i2              ; Chroma texcoords
-        */
-       for (i = 0; i < 3; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale unit rect to macroblock size
-        * decl c1              ; Translation vector to move macroblock into position
-        * decl c2              ; Denorm coefficients
-        * decl c3              ; Translation vector to move top field past ref macroblock texcoords into position
-        * decl c4              ; Translation vector to move bottom field past ref macroblock texcoords into position
-        * decl c5              ; Translation vector to move top field future ref macroblock texcoords into position
-        * decl c6              ; Translation vector to move bottom field future ref macroblock texcoords into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Luma texcoords
-        * decl o2              ; Chroma texcoords
-        * decl o3              ; Top field past ref macroblock texcoords
-        * decl o4              ; Bottom field past ref macroblock texcoords
-        * decl o5              ; Top field future ref macroblock texcoords
-        * decl o6              ; Bottom field future ref macroblock texcoords
-        * decl o7              ; Denormalized vertex pos
-        */
-       for (i = 0; i < 8; i++)
-       {
-               decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0, t1 */
-       decl = vl_decl_temps(0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add t1, t0, c1       ; Translate rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mov o0, t1           ; Move vertex pos to output */
-       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * mov o1, i1           ; Move input luma texcoords to output
-        * mov o2, i2           ; Move input chroma texcoords to output
-        */
-       for (i = 1; i < 3; ++i)
-       {
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /*
-        * add o3, t0, c3       ; Translate top field rect into position on past ref macroblock
-        * add o4, t0, c4       ; Translate bottom field rect into position on past ref macroblock
-        * add o5, t0, c5       ; Translate top field rect into position on future ref macroblock
-        * add o6, t0, c6       ; Translate bottom field rect into position on future ref macroblock
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* mul o7, t1, c2       ; Denorm vertex pos */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 100;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /*
-        * decl i0                      ; Texcoords for s0
-        * decl i1                      ; Texcoords for s1, s2
-        * decl i2                      ; Texcoords for s3
-        * decl i3                      ; Texcoords for s4
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
-        * decl c1                      ; Constant 1/2 in .x channel to use as weight to blend past and future texels
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0                      ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0-t2 */
-       decl = vl_decl_temps(0, 2);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl s0                      ; Sampler for luma texture
-        * decl s1                      ; Sampler for chroma Cb texture
-        * decl s2                      ; Sampler for chroma Cr texture
-        * decl s3                      ; Sampler for past ref surface texture
-        * decl s4                      ; Sampler for future ref surface texture
-        */
-       for (i = 0; i < 5; ++i)
-       {
-               decl = vl_decl_samplers(i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * tex2d t1, i0, s0             ; Read texel from luma texture
-        * mov t0.x, t1.x               ; Move luma sample into .x component
-        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-        * mov t0.y, t1.x               ; Move Cb sample into .y component
-        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
-        * mov t0.z, t1.x               ; Move Cr sample into .z component
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-       }
-       
-       /* mul t0, t0, c0               ; Rescale texel to correct range */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * tex2d t1, i2, s3             ; Read texel from past ref macroblock
-        * tex2d t2, i3, s4             ; Read texel from future ref macroblock
-        */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 200;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /*
-        * decl i0                      ; Texcoords for s0
-        * decl i1                      ; Texcoords for s1, s2
-        * decl i2                      ; Texcoords for s3
-        * decl i3                      ; Texcoords for s3
-        * decl i4                      ; Texcoords for s4
-        * decl i5                      ; Texcoords for s4
-        * decl i6                      ; Denormalized vertex pos
-        */
-       for (i = 0; i < 7; ++i)
-       {
-               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
-        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
-        *                              ; and for Y-mod-2 top/bottom field selection
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0                      ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0-t5 */
-       decl = vl_decl_temps(0, 5);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl s0                      ; Sampler for luma texture
-        * decl s1                      ; Sampler for chroma Cb texture
-        * decl s2                      ; Sampler for chroma Cr texture
-        * decl s3                      ; Sampler for past ref surface texture
-        * decl s4                      ; Sampler for future ref surface texture
-        */
-       for (i = 0; i < 5; ++i)
-       {
-               decl = vl_decl_samplers(i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * tex2d t1, i0, s0             ; Read texel from luma texture
-        * mov t0.x, t1.x               ; Move luma sample into .x component
-        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-        * mov t0.y, t1.x               ; Move Cb sample into .y component
-        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
-        * mov t0.z, t1.x               ; Move Cr sample into .z component
-        */
-       for (i = 0; i < 3; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-               
-       }
-       
-       /* mul t0, t0, c0               ; Rescale texel to correct range */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* XXX: Pos values off by 0.5? */
-       /* sub t4, i6.y, c1.x           ; Sub 0.5 from denormalized pos */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* floor t3, t3                 ; Get rid of fractional part */
-       inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t3, t3, c1.y             ; Multiply by 2 */
-       inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t3, t4, t3               ; Subtract from original Y to get Y % 2 */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * tex2d t1, i2, s3             ; Read texel from past ref macroblock top field
-        * tex2d t2, i3, s3             ; Read texel from past ref macroblock bottom field
-        */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
-       /* lerp t1, t3, t1, t2          ; Choose between top and bottom fields based on Y % 2 */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * tex2d t4, i4, s4             ; Read texel from future ref macroblock top field
-        * tex2d t5, i5, s4             ; Read texel from future ref macroblock bottom field
-        */
-       for (i = 0; i < 2; ++i)
-       {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4);
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
-       /* lerp t2, t3, t4, t5          ; Choose between top and bottom fields based on Y % 2 */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs);
-       free(tokens);
-       
-       return 0;
-}
-
-int vlCreateDataBufsMC(struct VL_CONTEXT *context)
-{
-       struct pipe_context     *pipe;
-       unsigned int            i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       
-       /* Create our vertex buffer and vertex buffer element */
-       context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F);
-       context->states.mc.vertex_bufs[0].max_index = 23;
-       context->states.mc.vertex_bufs[0].buffer_offset = 0;
-       context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_VERTEX,
-               sizeof(struct VL_VERTEX2F) * 24
-       );
-       
-       context->states.mc.vertex_buf_elems[0].src_offset = 0;
-       context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0;
-       context->states.mc.vertex_buf_elems[0].nr_components = 2;
-       context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-       
-       /* Create our texcoord buffers and texcoord buffer elements */
-       for (i = 1; i < 3; ++i)
-       {
-               context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F);
-               context->states.mc.vertex_bufs[i].max_index = 23;
-               context->states.mc.vertex_bufs[i].buffer_offset = 0;
-               context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create
-               (
-                       pipe->winsys,
-                       1,
-                       PIPE_BUFFER_USAGE_VERTEX,
-                       sizeof(struct VL_TEXCOORD2F) * 24
-               );
-       
-               context->states.mc.vertex_buf_elems[i].src_offset = 0;
-               context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i;
-               context->states.mc.vertex_buf_elems[i].nr_components = 2;
-               context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
-       }
-       
-       /* Fill buffers */
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               vl_chroma_420_texcoords,
-               sizeof(struct VL_VERTEX2F) * 24
-       );
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               vl_luma_texcoords,
-               sizeof(struct VL_TEXCOORD2F) * 24
-       );
-       /* TODO: Accomodate 422, 444 */
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               vl_chroma_420_texcoords,
-               sizeof(struct VL_TEXCOORD2F) * 24
-       );
-       
-       for (i = 0; i < 3; ++i)
-               pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer);
-       
-       /* Create our constant buffer */
-       context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS);
-       context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_CONSTANT,
-               context->states.mc.vs_const_buf.size
-       );
-       
-       context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS);
-       context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_CONSTANT,
-               context->states.mc.fs_const_buf.size
-       );
-       
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               &vl_mc_fs_consts,
-               sizeof(struct VL_MC_FS_CONSTS)
-       );
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer);
-       
-       return 0;
-}
-
-static int vlInitMC(struct VL_CONTEXT *context)
-{      
-       struct pipe_context             *pipe;
-       struct pipe_sampler_state       sampler;
-       struct pipe_texture             template;
-       unsigned int                    filters[5];
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       
-       /* For MC we render to textures, which are rounded up to nearest POT */
-       context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width);
-       context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height);
-       context->states.mc.viewport.scale[2] = 1;
-       context->states.mc.viewport.scale[3] = 1;
-       context->states.mc.viewport.translate[0] = 0;
-       context->states.mc.viewport.translate[1] = 0;
-       context->states.mc.viewport.translate[2] = 0;
-       context->states.mc.viewport.translate[3] = 0;
-       
-       context->states.mc.render_target.width = vlRoundUpPOT(context->video_width);
-       context->states.mc.render_target.height = vlRoundUpPOT(context->video_height);
-       context->states.mc.render_target.num_cbufs = 1;
-       /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */
-       context->states.mc.render_target.zsbuf = NULL;
-       
-       filters[0] = PIPE_TEX_FILTER_NEAREST;
-       filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
-       filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
-       filters[3] = PIPE_TEX_FILTER_LINEAR;
-       filters[4] = PIPE_TEX_FILTER_LINEAR;
-       
-       for (i = 0; i < 5; ++i)
-       {
-               sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-               sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-               sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-               sampler.min_img_filter = filters[i];
-               sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-               sampler.mag_img_filter = filters[i];
-               sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-               sampler.compare_func = PIPE_FUNC_ALWAYS;
-               sampler.normalized_coords = 1;
-               /*sampler.prefilter = ;*/
-               /*sampler.shadow_ambient = ;*/
-               /*sampler.lod_bias = ;*/
-               sampler.min_lod = 0;
-               /*sampler.max_lod = ;*/
-               /*sampler.border_color[i] = ;*/
-               /*sampler.max_anisotropy = ;*/
-               context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler);
-       }
-       
-       memset(&template, 0, sizeof(struct pipe_texture));
-       template.target = PIPE_TEXTURE_2D;
-       template.format = PIPE_FORMAT_R16_SNORM;
-       template.last_level = 0;
-       template.width[0] = 8;
-       template.height[0] = 8 * 4;
-       template.depth[0] = 1;
-       template.compressed = 0;
-       pf_get_block(template.format, &template.block);
-       
-       context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template);
-       
-       if (context->video_format == VL_FORMAT_YCBCR_420)
-               template.height[0] = 8;
-       else if (context->video_format == VL_FORMAT_YCBCR_422)
-               template.height[0] = 8 * 2;
-       else if (context->video_format == VL_FORMAT_YCBCR_444)
-               template.height[0] = 8 * 4;
-       else
-               assert(0);
-               
-       context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template);
-       context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template);
-       
-       /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */
-       
-       vlCreateVertexShaderIMC(context);
-       vlCreateFragmentShaderIMC(context);
-       vlCreateVertexShaderFramePMC(context);
-       vlCreateVertexShaderFieldPMC(context);
-       vlCreateFragmentShaderFramePMC(context);
-       vlCreateFragmentShaderFieldPMC(context);
-       vlCreateVertexShaderFrameBMC(context);
-       vlCreateVertexShaderFieldBMC(context);
-       vlCreateFragmentShaderFrameBMC(context);
-       vlCreateFragmentShaderFieldBMC(context);
-       vlCreateDataBufsMC(context);
-       
-       return 0;
-}
-
-static int vlDestroyMC(struct VL_CONTEXT *context)
-{
-       unsigned int i;
-       
-       assert(context);
-       
-       for (i = 0; i < 5; ++i)
-               context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]);
-       
-       for (i = 0; i < 3; ++i)
-               context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer);
-       
-       /* Textures 3 & 4 are not created directly, no need to release them here */
-       for (i = 0; i < 3; ++i)
-               pipe_texture_release(&context->states.mc.textures[i]);
-       
-       context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs);
-       context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs);
-       
-       for (i = 0; i < 2; ++i)
-       {
-               context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]);
-               context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]);
-               context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]);
-               context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]);
-       }
-       
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer);
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer);
-       
-       return 0;
-}
-
-static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 50;
-       
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-       
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-       ti = 3;
-
-       /*
-        * decl i0              ; Vertex pos
-        * decl i1              ; Vertex texcoords
-        */
-       for (i = 0; i < 2; i++)
-       {
-               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /*
-        * decl c0              ; Scaling vector to scale texcoord rect to source size
-        * decl c1              ; Translation vector to move texcoord rect into position
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /*
-        * decl o0              ; Vertex pos
-        * decl o1              ; Vertex texcoords
-        */
-       for (i = 0; i < 2; i++)
-       {
-               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       }
-       
-       /* decl t0 */
-       decl = vl_decl_temps(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* mov o0, i0           ; Move pos in to pos out */
-       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* mul t0, i1, c0       ; Scale unit texcoord rect to source size */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* add o1, t0, c1       ; Translate texcoord rect into position */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       vs.tokens = tokens;
-       context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs);
-       //free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context)
-{
-       const unsigned int              max_tokens = 50;
-
-       struct pipe_context             *pipe;
-       struct pipe_shader_state        fs;
-       struct tgsi_token               *tokens;
-       struct tgsi_header              *header;
-
-       struct tgsi_full_declaration    decl;
-       struct tgsi_full_instruction    inst;
-       
-       unsigned int                    ti;
-       unsigned int                    i;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
-
-       /* Version */
-       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-       /* Header */
-       header = (struct tgsi_header*)&tokens[1];
-       *header = tgsi_build_header();
-       /* Processor */
-       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-       ti = 3;
-
-       /* decl i0              ; Texcoords for s0 */
-       decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * decl c0              ; Bias vector for CSC
-        * decl c1-c4           ; CSC matrix c1-c4
-        */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-       /* decl o0              ; Fragment color */
-       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl t0 */
-       decl = vl_decl_temps(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* decl s0              ; Sampler for tex containing picture to display */
-       decl = vl_decl_samplers(0, 0);
-       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-       
-       /* tex2d t0, i0, s0     ; Read src pixel */
-       inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c0       ; Subtract bias vector from pixel */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /*
-        * dp4 o0.x, t0, c1     ; Multiply pixel by the color conversion matrix
-        * dp4 o0.y, t0, c2
-        * dp4 o0.z, t0, c3
-        * dp4 o0.w, t0, c4     ; XXX: Don't need 4th coefficient
-        */
-       for (i = 0; i < 4; ++i)
-       {
-               inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       }
-
-       /* end */
-       inst = vl_end();
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-       fs.tokens = tokens;
-       context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs);
-       //free(tokens);
-       
-       return 0;
-}
-
-static int vlCreateDataBufsCSC(struct VL_CONTEXT *context)
-{
-       struct pipe_context *pipe;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       
-       /*
-       Create our vertex buffer and vertex buffer element
-       VB contains 4 vertices that render a quad covering the entire window
-       to display a rendered surface
-       Quad is rendered as a tri strip
-       */
-       context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F);
-       context->states.csc.vertex_bufs[0].max_index = 3;
-       context->states.csc.vertex_bufs[0].buffer_offset = 0;
-       context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_VERTEX,
-               sizeof(struct VL_VERTEX2F) * 4
-       );
-       
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               vl_surface_vertex_positions,
-               sizeof(struct VL_VERTEX2F) * 4
-       );
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer);
-       
-       context->states.csc.vertex_buf_elems[0].src_offset = 0;
-       context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0;
-       context->states.csc.vertex_buf_elems[0].nr_components = 2;
-       context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-       
-       /*
-       Create our texcoord buffer and texcoord buffer element
-       Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
-       */
-       context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F);
-       context->states.csc.vertex_bufs[1].max_index = 3;
-       context->states.csc.vertex_bufs[1].buffer_offset = 0;
-       context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_VERTEX,
-               sizeof(struct VL_TEXCOORD2F) * 4
-       );
-       
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               vl_surface_texcoords,
-               sizeof(struct VL_TEXCOORD2F) * 4
-       );
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer);
-       
-       context->states.csc.vertex_buf_elems[1].src_offset = 0;
-       context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1;
-       context->states.csc.vertex_buf_elems[1].nr_components = 2;
-       context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-       
-       /*
-       Create our vertex shader's constant buffer
-       Const buffer contains scaling and translation vectors
-       */
-       context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS);
-       context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_CONSTANT,
-               context->states.csc.vs_const_buf.size
-       );
-       
-       /*
-       Create our fragment shader's constant buffer
-       Const buffer contains the color conversion matrix and bias vectors
-       */
-       context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS);
-       context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create
-       (
-               pipe->winsys,
-               1,
-               PIPE_BUFFER_USAGE_CONSTANT,
-               context->states.csc.fs_const_buf.size
-       );
-       
-       /*
-       TODO: Refactor this into a seperate function,
-       allow changing the CSC matrix at runtime to switch between regular & full versions
-       */
-       memcpy
-       (
-               pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-               &vl_csc_fs_consts_601,
-               sizeof(struct VL_CSC_FS_CONSTS)
-       );
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer);
-       
-       return 0;
-}
-
-static int vlInitCSC(struct VL_CONTEXT *context)
-{      
-       struct pipe_context             *pipe;
-       struct pipe_sampler_state       sampler;
-       
-       assert(context);
-       
-       pipe = context->pipe;
-       
-       /* Delay creating the FB until vlPutSurface() so we know window size */
-       context->states.csc.framebuffer.num_cbufs = 1;
-       context->states.csc.framebuffer.cbufs[0] = NULL;
-       context->states.csc.framebuffer.zsbuf = NULL;
-
-       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-       sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
-       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-       sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
-       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-       sampler.compare_func = PIPE_FUNC_ALWAYS;
-       sampler.normalized_coords = 1;
-       /*sampler.prefilter = ;*/
-       /*sampler.shadow_ambient = ;*/
-       /*sampler.lod_bias = ;*/
-       /*sampler.min_lod = ;*/
-       /*sampler.max_lod = ;*/
-       /*sampler.border_color[i] = ;*/
-       /*sampler.max_anisotropy = ;*/
-       context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler);
-       
-       vlCreateVertexShaderCSC(context);
-       vlCreateFragmentShaderCSC(context);
-       vlCreateDataBufsCSC(context);
-       
-       return 0;
-}
-
-static int vlDestroyCSC(struct VL_CONTEXT *context)
-{
-       assert(context);
-       
-       /*
-       Since we create the final FB when we display our first surface,
-       it may not be created if vlPutSurface() is never called
-       */
-       if (context->states.csc.framebuffer.cbufs[0])
-               context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]);
-       context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler);
-       context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader);
-       context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader);
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer);
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer);
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer);
-       context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer);
-       
-       return 0;
-}
-
-static int vlInitCommon(struct VL_CONTEXT *context)
+static int vlInitCommon(struct vlContext *context)
 {
        struct pipe_context                     *pipe;
        struct pipe_rasterizer_state            rast;
        struct pipe_blend_state                 blend;
        struct pipe_depth_stencil_alpha_state   dsa;
        unsigned int                            i;
-       
+
        assert(context);
-       
+
        pipe = context->pipe;
-       
+
        rast.flatshade = 1;
        rast.flatshade_first = 0;
        rast.light_twoside = 0;
@@ -2113,9 +51,9 @@ static int vlInitCommon(struct VL_CONTEXT *context)
        rast.offset_units = 1;
        rast.offset_scale = 1;
        /*rast.sprite_coord_mode[i] = ;*/
-       context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast);
-       pipe->bind_rasterizer_state(pipe, context->states.common.raster);
-       
+       context->raster = pipe->create_rasterizer_state(pipe, &rast);
+       pipe->bind_rasterizer_state(pipe, context->raster);
+
        blend.blend_enable = 0;
        blend.rgb_func = PIPE_BLEND_ADD;
        blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
@@ -2128,9 +66,9 @@ static int vlInitCommon(struct VL_CONTEXT *context)
        /* Needed to allow color writes to FB, even if blending disabled */
        blend.colormask = PIPE_MASK_RGBA;
        blend.dither = 0;
-       context->states.common.blend = pipe->create_blend_state(pipe, &blend);
-       pipe->bind_blend_state(pipe, context->states.common.blend);
-       
+       context->blend = pipe->create_blend_state(pipe, &blend);
+       pipe->bind_blend_state(pipe, context->blend);
+
        dsa.depth.enabled = 0;
        dsa.depth.writemask = 0;
        dsa.depth.func = PIPE_FUNC_ALWAYS;
@@ -2149,134 +87,122 @@ static int vlInitCommon(struct VL_CONTEXT *context)
        dsa.alpha.enabled = 0;
        dsa.alpha.func = PIPE_FUNC_ALWAYS;
        dsa.alpha.ref = 0;
-       context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa);
-       pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa);
-       
-       return 0;
-}
+       context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+       pipe->bind_depth_stencil_alpha_state(pipe, context->dsa);
 
-static int vlDestroyCommon(struct VL_CONTEXT *context)
-{
-       assert(context);
-       
-       context->pipe->delete_blend_state(context->pipe, context->states.common.blend);
-       context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster);
-       context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa);
-       
        return 0;
 }
 
-static int vlInit(struct VL_CONTEXT *context)
+int vlCreateContext
+(
+       struct vlScreen *screen,
+       struct pipe_context *pipe,
+       unsigned int picture_width,
+       unsigned int picture_height,
+       enum vlFormat picture_format,
+       enum vlProfile profile,
+       enum vlEntryPoint entry_point,
+       struct vlContext **context
+)
 {
+       struct vlContext *ctx;
+
+       assert(screen);
        assert(context);
-       
-       vlInitCommon(context);
-       vlInitCSC(context);
-       vlInitMC(context);
-       vlInitIDCT(context);
-       
+       assert(pipe);
+
+       ctx = calloc(1, sizeof(struct vlContext));
+
+       if (!ctx)
+               return 1;
+
+       ctx->screen = screen;
+       ctx->pipe = pipe;
+       ctx->picture_width = picture_width;
+       ctx->picture_height = picture_height;
+       ctx->picture_format = picture_format;
+       ctx->profile = profile;
+       ctx->entry_point = entry_point;
+
+       vlInitCommon(ctx);
+
+       vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);
+       vlCreateBasicCSC(pipe, &ctx->csc);
+
+       *context = ctx;
+
        return 0;
 }
 
-static int vlDestroy(struct VL_CONTEXT *context)
+int vlDestroyContext
+(
+       struct vlContext *context
+)
 {
        assert(context);
-       
+
        /* XXX: Must unbind shaders before we can delete them for some reason */
        context->pipe->bind_vs_state(context->pipe, NULL);
        context->pipe->bind_fs_state(context->pipe, NULL);
-       
-       vlDestroyCommon(context);
-       vlDestroyCSC(context);
-       vlDestroyMC(context);
-       vlDestroyIDCT(context);
-       
+
+       context->render->vlDestroy(context->render);
+       context->csc->vlDestroy(context->csc);
+
+       context->pipe->delete_blend_state(context->pipe, context->blend);
+       context->pipe->delete_rasterizer_state(context->pipe, context->raster);
+       context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa);
+
+       free(context);
+
        return 0;
 }
 
-int vlCreateContext
+struct vlScreen* vlContextGetScreen
 (
-       Display *display,
-       struct pipe_context *pipe,
-       unsigned int video_width,
-       unsigned int video_height,
-       enum VL_FORMAT video_format,
-       struct VL_CONTEXT **context
+       struct vlContext *context
 )
 {
-       struct VL_CONTEXT *ctx;
-       
-       assert(display);
-       assert(pipe);
        assert(context);
-       
-       ctx = calloc(1, sizeof(struct VL_CONTEXT));
-       
-       ctx->display = display;
-       ctx->pipe = pipe;
-       ctx->video_width = video_width;
-       ctx->video_height = video_height;
-       ctx->video_format = video_format;
-       
-       vlInit(ctx);
-       
-       /* Since we only change states in vlPutSurface() we need to start in render mode */
-       vlBeginRender(ctx);
-       
-       *context = ctx;
-       
-       return 0;
+
+       return context->screen;
 }
 
-int vlDestroyContext(struct VL_CONTEXT *context)
+struct pipe_context* vlGetPipeContext
+(
+       struct vlContext *context
+)
 {
        assert(context);
-       
-       vlDestroy(context);
-       
-       free(context);
-       
-       return 0;
+
+       return context->pipe;
 }
 
-int vlBeginRender(struct VL_CONTEXT *context)
+unsigned int vlGetPictureWidth
+(
+       struct vlContext *context
+)
 {
-       struct pipe_context     *pipe;
-       
        assert(context);
-       
-       pipe = context->pipe;
-       
-       /* Frame buffer set in vlRender*Macroblock() */
-       /* Shaders, samplers, textures set in vlRender*Macroblock() */
-       pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs);
-       pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems);
-       pipe->set_viewport_state(pipe, &context->states.mc.viewport);
-       pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf);
-       pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf);
-       
-       return 0;
+
+       return context->picture_width;
 }
 
-int vlEndRender(struct VL_CONTEXT *context)
+unsigned int vlGetPictureHeight
+(
+       struct vlContext *context
+)
 {
-       struct pipe_context *pipe;
-       
        assert(context);
-       
-       pipe = context->pipe;
-       
-       pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer);
-       pipe->set_viewport_state(pipe, &context->states.csc.viewport);
-       pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler);
-       /* Source texture set in vlPutSurface() */
-       pipe->bind_vs_state(pipe, context->states.csc.vertex_shader);
-       pipe->bind_fs_state(pipe, context->states.csc.fragment_shader);
-       pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs);
-       pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems);
-       pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf);
-       pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf);
-       
-       return 0;
+
+       return context->picture_height;
 }
 
+enum vlFormat vlGetPictureFormat
+(
+       struct vlContext *context
+)
+{
+       assert(context);
+
+       return context->picture_format;
+}
index bff318854aaa245b496063f90133fcdf0151a161..3d14634c44e7d314cfb7d7d227e19520d09f898d 100644 (file)
@@ -1,83 +1,73 @@
 #ifndef vl_context_h
 #define vl_context_h
 
-#include <X11/Xlib.h>
-#include <pipe/p_state.h>
 #include "vl_types.h"
 
 struct pipe_context;
 
-struct VL_CONTEXT
+#ifdef VL_INTERNAL
+struct vlRender;
+struct vlCSC;
+
+struct vlContext
 {
-       Display                 *display;
+       struct vlScreen         *screen;
        struct pipe_context     *pipe;
-       unsigned int            video_width;
-       unsigned int            video_height;
-       enum VL_FORMAT          video_format;
-       
-       struct
-       {
-               struct
-               {
-                       struct pipe_rasterizer_state            *raster;
-                       struct pipe_depth_stencil_alpha_state   *dsa;
-                       struct pipe_blend_state                 *blend;
-               } common;
-               
-               struct
-               {
-                       struct pipe_viewport_state              viewport;
-                       struct pipe_framebuffer_state           render_target;
-                       struct pipe_sampler_state               *sampler;
-                       struct pipe_texture                     *texture;
-                       struct pipe_texture                     *basis;
-                       struct pipe_shader_state                *frame_vs;
-                       struct pipe_shader_state                *frame_fs;
-                       struct pipe_vertex_buffer               *vertex_bufs[2];
-                       struct pipe_vertex_element              *vertex_buf_elems[2];
-                       //struct pipe_constant_buffer           vs_const_buf, fs_const_buf;
-               } idct;
-               
-               struct
-               {
-                       struct pipe_viewport_state              viewport;
-                       struct pipe_framebuffer_state           render_target;
-                       struct pipe_sampler_state               *samplers[5];
-                       struct pipe_texture                     *textures[5];
-                       struct pipe_shader_state                *i_vs, *p_vs[2], *b_vs[2];
-                       struct pipe_shader_state                *i_fs, *p_fs[2], *b_fs[2];
-                       struct pipe_vertex_buffer               vertex_bufs[3];
-                       struct pipe_vertex_element              vertex_buf_elems[3];
-                       struct pipe_constant_buffer             vs_const_buf, fs_const_buf;
-               } mc;
-               
-               struct
-               {
-                       struct pipe_viewport_state              viewport;
-                       struct pipe_framebuffer_state           framebuffer;
-                       struct pipe_sampler_state               *sampler;
-                       struct pipe_shader_state                *vertex_shader, *fragment_shader;
-                       struct pipe_vertex_buffer               vertex_bufs[2];
-                       struct pipe_vertex_element              vertex_buf_elems[2];
-                       struct pipe_constant_buffer             vs_const_buf, fs_const_buf;
-               } csc;
-       } states;
+       unsigned int            picture_width;
+       unsigned int            picture_height;
+       enum vlFormat           picture_format;
+       enum vlProfile          profile;
+       enum vlEntryPoint       entry_point;
+
+       void                    *raster;
+       void                    *dsa;
+       void                    *blend;
+
+       struct vlRender         *render;
+       struct vlCSC            *csc;
 };
+#endif
 
 int vlCreateContext
 (
-       Display *display,
+       struct vlScreen *screen,
        struct pipe_context *pipe,
-       unsigned int video_width,
-       unsigned int video_height,
-       enum VL_FORMAT video_format,
-       struct VL_CONTEXT **context
+       unsigned int picture_width,
+       unsigned int picture_height,
+       enum vlFormat picture_format,
+       enum vlProfile profile,
+       enum vlEntryPoint entry_point,
+       struct vlContext **context
 );
 
-int vlDestroyContext(struct VL_CONTEXT *context);
+int vlDestroyContext
+(
+       struct vlContext *context
+);
 
-int vlBeginRender(struct VL_CONTEXT *context);
-int vlEndRender(struct VL_CONTEXT *context);
+struct vlScreen* vlContextGetScreen
+(
+       struct vlContext *context
+);
 
-#endif
+struct pipe_context* vlGetPipeContext
+(
+       struct vlContext *context
+);
 
+unsigned int vlGetPictureWidth
+(
+       struct vlContext *context
+);
+
+unsigned int vlGetPictureHeight
+(
+       struct vlContext *context
+);
+
+enum vlFormat vlGetPictureFormat
+(
+       struct vlContext *context
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_csc.h b/src/gallium/state_trackers/g3dvl/vl_csc.h
new file mode 100644 (file)
index 0000000..36417a2
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef vl_csc_h
+#define vl_csc_h
+
+#include "vl_types.h"
+
+struct pipe_surface;
+
+struct vlCSC
+{
+       int (*vlResizeFrameBuffer)
+       (
+               struct vlCSC *csc,
+               unsigned int width,
+               unsigned int height
+       );
+
+       int (*vlBegin)
+       (
+               struct vlCSC *csc
+       );
+
+       int (*vlPutPicture)
+       (
+               struct vlCSC *csc,
+               struct vlSurface *surface,
+               int srcx,
+               int srcy,
+               int srcw,
+               int srch,
+               int destx,
+               int desty,
+               int destw,
+               int desth,
+               enum vlPictureType picture_type
+       );
+
+       int (*vlEnd)
+       (
+               struct vlCSC *csc
+       );
+
+       struct pipe_surface* (*vlGetFrameBuffer)
+       (
+               struct vlCSC *csc
+       );
+
+       int (*vlDestroy)
+       (
+               struct vlCSC *csc
+       );
+};
+
+#endif
index 0e5c8c77f90aa7b48740ae129ee84cef053742c5..f2476dbf1ee9141da8dbb6bbd2174d7124d83e17 100644 (file)
@@ -6,17 +6,17 @@
  * Need to be scaled to cover mbW*mbH macroblock pixels and translated into
  * position on target surface.
  */
-const struct VL_VERTEX2F vl_mb_vertex_positions[24] =
+const struct vlVertex2f macroblock_verts[24] =
 {
        {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f},
        {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f},
-       
+
        {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f},
        {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f},
-       
+
        {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f},
        {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f},
-       
+
        {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f},
        {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f}
 };
@@ -26,17 +26,17 @@ const struct VL_VERTEX2F vl_mb_vertex_positions[24] =
  * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at
  * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH.
  */
-const struct VL_TEXCOORD2F vl_luma_texcoords[24] =
+const struct vlVertex2f macroblock_luma_texcoords[24] =
 {
        {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f},
        {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f},
-       
+
        {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f},
        {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f},
-       
+
        {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f},
        {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f},
-       
+
        {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f},
        {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f}
 };
@@ -45,7 +45,7 @@ const struct VL_TEXCOORD2F vl_luma_texcoords[24] =
  * Represents texcoords for the above for rendering 1 chroma block.
  * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors.
  */
-const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions;
+const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts;
 
 /*
  * Represents texcoords for the above for rendering 2 chroma blocks arranged
@@ -53,30 +53,13 @@ const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2
  * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping.
  * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors.
  */
-const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions;
+const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts;
 
 /*
  * Represents texcoords for the above for rendering 4 chroma blocks.
  * Same case as 4 luma blocks.
  */
-const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords;
-
-/*
- * Represents 2 triangles in a strip in normalized coords.
- * Used to render the surface onto the frame buffer.
- */
-const struct VL_VERTEX2F vl_surface_vertex_positions[4] =
-{
-       {0.0f, 0.0f},
-       {0.0f, 1.0f},
-       {1.0f, 0.0f},
-       {1.0f, 1.0f}
-};
-
-/*
- * Represents texcoords for the above. We can use the position values directly.
- */
-const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_surface_vertex_positions;
+const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords;
 
 /*
  * Used when rendering P and B macroblocks, multiplier is applied to the A channel,
@@ -84,97 +67,10 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)
  * get back the differential. The differential is then added to the samples from the
  * reference surface(s).
  */
+#if 0
 const struct VL_MC_FS_CONSTS vl_mc_fs_consts =
 {
        {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
        {0.5f, 2.0f, 0.0f, 0.0f}
 };
-
-/*
- * Identity color conversion constants, for debugging
- */
-const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity =
-{
-       {
-               0.0f, 0.0f, 0.0f, 0.0f
-       },
-       {
-               1.0f, 0.0f, 0.0f, 0.0f,
-               0.0f, 1.0f, 0.0f, 0.0f,
-               0.0f, 0.0f, 1.0f, 0.0f,
-               0.0f, 0.0f, 0.0f, 1.0f
-       }
-};
-
-/*
- * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
- * Y is in [16,235], Cb and Cr are in [16,240]
- * R, G, and B are in [16,235]
- */
-const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601 =
-{
-       {
-               0.0f,           0.501960784f,   0.501960784f,   0.0f
-       },
-       {
-               1.0f,           0.0f,           1.371f,         0.0f,
-               1.0f,           -0.336f,        -0.698f,        0.0f,
-               1.0f,           1.732f,         0.0f,           0.0f,
-               0.0f,           0.0f,           0.0f,           1.0f
-       }
-};
-
-/*
- * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
- * Y is in [16,235], Cb and Cr are in [16,240]
- * R, G, and B are in [0,255]
- */
-const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full =
-{
-       {
-               0.062745098f,   0.501960784f,   0.501960784f,   0.0f
-       },
-       {
-               1.164f,         0.0f,           1.596f,         0.0f,
-               1.164f,         -0.391f,        -0.813f,        0.0f,
-               1.164f,         2.018f,         0.0f,           0.0f,
-               0.0f,           0.0f,           0.0f,           1.0f
-       }
-};
-
-/*
- * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
- * Y is in [16,235], Cb and Cr are in [16,240]
- * R, G, and B are in [16,235]
- */
-const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709 =
-{
-       {
-               0.0f,           0.501960784f,   0.501960784f,   0.0f
-       },
-       {
-               1.0f,           0.0f,           1.540f,         0.0f,
-               1.0f,           -0.183f,        -0.459f,        0.0f,
-               1.0f,           1.816f,         0.0f,           0.0f,
-               0.0f,           0.0f,           0.0f,           1.0f
-       }
-};
-
-/*
- * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
- * Y is in [16,235], Cb and Cr are in [16,240]
- * R, G, and B are in [0,255]
- */
-const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full =
-{
-       {
-               0.062745098f,   0.501960784f,   0.501960784f,   0.0f
-       },
-       {
-               1.164f,         0.0f,           1.793f,         0.0f,
-               1.164f,         -0.213f,        -0.534f,        0.0f,
-               1.164f,         2.115f,         0.0f,           0.0f,
-               0.0f,           0.0f,           0.0f,           1.0f
-       }
-};
-
+#endif
index 8f347273ad79af2c25fd40f70e178296542b5672..f0de2e976ce18df3134c681fe44c37d3e167cfc4 100644 (file)
@@ -3,15 +3,18 @@
 
 #include "vl_types.h"
 
-extern const struct VL_VERTEX2F vl_mb_vertex_positions[24];
-extern const struct VL_TEXCOORD2F vl_luma_texcoords[24];
-extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords;
-extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords;
-extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords;
+/* TODO: Needs to be rolled into the appropriate stage */
 
-extern const struct VL_VERTEX2F vl_surface_vertex_positions[4];
-extern const struct VL_TEXCOORD2F *vl_surface_texcoords;
+extern const struct vlVertex2f macroblock_verts[24];
+extern const struct vlVertex2f macroblock_luma_texcoords[24];
+extern const struct vlVertex2f *macroblock_chroma_420_texcoords;
+extern const struct vlVertex2f *macroblock_chroma_422_texcoords;
+extern const struct vlVertex2f *macroblock_chroma_444_texcoords;
 
+extern const struct vlVertex2f surface_verts[4];
+extern const struct vlVertex2f *surface_texcoords;
+
+/*
 extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts;
 
 extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity;
@@ -19,6 +22,6 @@ extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601;
 extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full;
 extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709;
 extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full;
+*/
 
 #endif
-
index e668a7a10e0c66f33938f41ab40dba8d73f48a54..d612d02502f039a0038d614ad0f40101f5f4349c 100644 (file)
@@ -9,4 +9,3 @@
 #define VL_MACROBLOCK_SIZE     (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT)
 
 #endif
-
diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c
new file mode 100644 (file)
index 0000000..af80faa
--- /dev/null
@@ -0,0 +1,48 @@
+#define VL_INTERNAL
+#include "vl_display.h"
+#include <assert.h>
+#include <stdlib.h>
+
+int vlCreateDisplay
+(
+       vlNativeDisplay native_display,
+       struct vlDisplay **display
+)
+{
+       struct vlDisplay *dpy;
+
+       assert(native_display);
+       assert(display);
+
+       dpy = calloc(1, sizeof(struct vlDisplay));
+
+       if (!dpy)
+               return 1;
+
+       dpy->native = native_display;
+       *display = dpy;
+
+       return 0;
+}
+
+int vlDestroyDisplay
+(
+       struct vlDisplay *display
+)
+{
+       assert(display);
+
+       free(display);
+
+       return 0;
+}
+
+vlNativeDisplay vlGetNativeDisplay
+(
+       struct vlDisplay *display
+)
+{
+       assert(display);
+
+       return display->native;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_display.h b/src/gallium/state_trackers/g3dvl/vl_display.h
new file mode 100644 (file)
index 0000000..e11fd40
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef vl_display_h
+#define vl_display_h
+
+#include "vl_types.h"
+
+#ifdef VL_INTERNAL
+struct vlDisplay
+{
+       vlNativeDisplay native;
+};
+#endif
+
+int vlCreateDisplay
+(
+       vlNativeDisplay native_display,
+       struct vlDisplay **display
+);
+
+int vlDestroyDisplay
+(
+       struct vlDisplay *display
+);
+
+vlNativeDisplay vlGetNativeDisplay
+(
+       struct vlDisplay *display
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c
new file mode 100644 (file)
index 0000000..4fae224
--- /dev/null
@@ -0,0 +1,2315 @@
+#define VL_INTERNAL
+#include "vl_r16snorm_mc.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <pipe/p_context.h>
+#include <pipe/p_winsys.h>
+#include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
+#include <tgsi/tgsi_parse.h>
+#include <tgsi/tgsi_build.h>
+#include "vl_render.h"
+#include "vl_shader_build.h"
+#include "vl_surface.h"
+#include "vl_util.h"
+#include "vl_types.h"
+#include "vl_defs.h"
+
+struct vlVertexShaderConsts
+{
+       /*struct vlVertex4f scale;
+       struct vlVertex4f denorm;*/
+       struct vlVertex4f       scale;
+       struct vlVertex4f       mb_pos_trans;
+       struct vlVertex4f       denorm;
+       struct
+       {
+               struct vlVertex4f       top_field;
+               struct vlVertex4f       bottom_field;
+       } mb_tc_trans[2];
+};
+
+struct vlFragmentShaderConsts
+{
+       struct vlVertex4f multiplier;
+       struct vlVertex4f div;
+};
+
+struct vlR16SnormMC
+{
+       struct vlRender                         base;
+
+       unsigned int                            video_width, video_height;
+       enum vlFormat                           video_format;
+
+       struct pipe_context                     *pipe;
+       struct pipe_viewport_state              viewport;
+       struct pipe_framebuffer_state           render_target;
+       struct pipe_sampler_state               *samplers[5];
+       struct pipe_texture                     *textures[5];
+       void                                    *i_vs, *p_vs[2], *b_vs[2];
+       void                                    *i_fs, *p_fs[2], *b_fs[2];
+       struct pipe_vertex_buffer               vertex_bufs[3];
+       struct pipe_vertex_element              vertex_elems[3];
+       struct pipe_constant_buffer             vs_const_buf, fs_const_buf;
+};
+
+int vlBegin
+(
+       struct vlRender *render
+)
+{
+       struct vlR16SnormMC     *mc;
+       struct pipe_context     *pipe;
+
+       assert(render);
+
+       mc = (struct vlR16SnormMC*)render;
+       pipe = mc->pipe;
+
+       /* Frame buffer set in vlRender*Macroblock() */
+       /* Shaders, samplers, textures set in vlRender*Macroblock() */
+       pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs);
+       pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
+       pipe->set_viewport_state(pipe, &mc->viewport);
+       pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
+       pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
+
+       return 0;
+}
+
+/*static int vlGrabMacroBlock
+(
+       struct vlR16SnormMC *mc,
+       struct vlMpeg2MacroBlock *macroblock
+)
+{
+       assert(mc);
+       assert(macroblock);
+
+
+
+       return 0;
+}*/
+
+/*#define DO_IDCT*/
+
+#ifdef DO_IDCT
+static int vlTransformBlock(short *src, short *dst, short bias)
+{
+       static const float basis[8][8] =
+       {
+               {0.3536,   0.4904,   0.4619,   0.4157,   0.3536,   0.2778,   0.1913,   0.0975},
+               {0.3536,   0.4157,   0.1913,  -0.0975,  -0.3536,  -0.4904,  -0.4619,  -0.2778},
+               {0.3536,   0.2778,  -0.1913,  -0.4904,  -0.3536,   0.0975,   0.4619,   0.4157},
+               {0.3536,   0.0975,  -0.4619,  -0.2778,   0.3536,   0.4157,  -0.1913,  -0.4904},
+               {0.3536,  -0.0975,  -0.4619,   0.2778,   0.3536,  -0.4157,  -0.1913,   0.4904},
+               {0.3536,  -0.2778,  -0.1913,   0.4904,  -0.3536,  -0.0975,   0.4619,  -0.4157},
+               {0.3536,  -0.4157,   0.1913,   0.0975,  -0.3536,   0.4904,  -0.4619,   0.2778},
+               {0.3536,  -0.4904,   0.4619,  -0.4157,   0.3536,  -0.2778,   0.1913,  -0.0975}
+       };
+
+       unsigned int    x, y;
+       short           tmp[64];
+
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+                       tmp[y * VL_BLOCK_WIDTH + x] = (short)
+                       (
+                               src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] +
+                               src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] +
+                               src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] +
+                               src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] +
+                               src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] +
+                               src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] +
+                               src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] +
+                               src[y * VL_BLOCK_WIDTH + 7] * basis[x][7]
+                       );
+
+       for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+               for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               {
+                       dst[y * VL_BLOCK_WIDTH + x] = bias + (short)
+                       (
+                               tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] +
+                               tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] +
+                               tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] +
+                               tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] +
+                               tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] +
+                               tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] +
+                               tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] +
+                               tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7]
+                       );
+                       if (dst[y * VL_BLOCK_WIDTH + x] > 255)
+                               dst[y * VL_BLOCK_WIDTH + x] = 255;
+                       else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0)
+                               dst[y * VL_BLOCK_WIDTH + x] = 0;
+               }
+       return 0;
+}
+#endif
+
+static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int y;
+
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+
+       return 0;
+}
+
+static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int y;
+
+       for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch * 2,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+
+       dst += VL_BLOCK_HEIGHT * dst_pitch;
+
+       for (; y < VL_BLOCK_HEIGHT; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch * 2,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+
+       return 0;
+}
+
+static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
+{
+       unsigned int y;
+
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               memset
+               (
+                       dst + y * dst_pitch,
+                       0,
+                       VL_BLOCK_WIDTH * 2
+               );
+
+       return 0;
+}
+
+enum vlSampleType
+{
+       vlSampleTypeFull,
+       vlSampleTypeDiff
+};
+
+static int vlGrabBlocks
+(
+       struct vlR16SnormMC *mc,
+       unsigned int coded_block_pattern,
+       enum vlDCTType dct_type,
+       enum vlSampleType sample_type,
+       short *blocks
+)
+{
+       struct pipe_surface     *tex_surface;
+       short                   *texels;
+       unsigned int            tex_pitch;
+       unsigned int            tb, sb = 0;
+
+       assert(mc);
+       assert(blocks);
+
+       tex_surface = mc->pipe->screen->get_tex_surface
+       (
+               mc->pipe->screen,
+               mc->textures[0],
+               0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
+       );
+
+       texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
+       tex_pitch = tex_surface->stride / tex_surface->block.size;
+
+       for (tb = 0; tb < 4; ++tb)
+       {
+               if ((coded_block_pattern >> (5 - tb)) & 1)
+               {
+                       short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
+
+#ifdef DO_IDCT
+                       vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0);
+#endif
+
+                       if (dct_type == vlDCTTypeFrameCoded)
+                               vlGrabFrameCodedBlock
+                               (
+                                       cur_block,
+                                       texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
+                                       tex_pitch
+                               );
+                       else
+                               vlGrabFieldCodedBlock
+                               (
+                                       cur_block,
+                                       texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
+                                       tex_pitch
+                               );
+
+                       ++sb;
+               }
+               else
+                       vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch);
+       }
+
+       pipe_surface_unmap(tex_surface);
+
+       /* TODO: Implement 422, 444 */
+       for (tb = 0; tb < 2; ++tb)
+       {
+               tex_surface = mc->pipe->screen->get_tex_surface
+               (
+                       mc->pipe->screen,
+                       mc->textures[tb + 1],
+                       0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
+               );
+
+               texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
+               tex_pitch = tex_surface->stride / tex_surface->block.size;
+
+               if ((coded_block_pattern >> (1 - tb)) & 1)
+               {
+                       short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
+
+#ifdef DO_IDCT
+                       vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0);
+#endif
+
+                       vlGrabFrameCodedBlock
+                       (
+                               cur_block,
+                               texels,
+                               tex_pitch
+                       );
+
+                       ++sb;
+               }
+               else
+                       vlGrabNoBlock(texels, tex_pitch);
+
+               pipe_surface_unmap(tex_surface);
+       }
+
+       return 0;
+}
+
+int vlRenderIMacroBlock
+(
+       struct vlR16SnormMC *mc,
+       enum vlPictureType picture_type,
+       enum vlFieldOrder field_order,
+       unsigned int mbx,
+       unsigned int mby,
+       unsigned int coded_block_pattern,
+       enum vlDCTType dct_type,
+       short *blocks,
+       struct vlSurface *surface
+)
+{
+       struct pipe_context             *pipe;
+       struct vlVertexShaderConsts     *vs_consts;
+
+       assert(blocks);
+       assert(surface);
+
+       /* TODO: Implement interlaced rendering */
+       if (picture_type != vlPictureTypeFrame)
+               return 0;
+
+       vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks);
+
+       pipe = mc->pipe;
+
+       vs_consts = pipe->winsys->buffer_map
+       (
+               pipe->winsys,
+               mc->vs_const_buf.buffer,
+               PIPE_BUFFER_USAGE_CPU_WRITE
+       );
+
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0];
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0];
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0];
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0];
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
+
+       pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
+
+       mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
+       (
+               pipe->screen,
+               surface->texture,
+               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+       );
+       pipe->set_framebuffer_state(pipe, &mc->render_target);
+       pipe->set_sampler_textures(pipe, 3, mc->textures);
+       pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
+       pipe->bind_vs_state(pipe, mc->i_vs);
+       pipe->bind_fs_state(pipe, mc->i_fs);
+
+       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
+
+       return 0;
+}
+
+int vlRenderPMacroBlock
+(
+       struct vlR16SnormMC *mc,
+       enum vlPictureType picture_type,
+       enum vlFieldOrder field_order,
+       unsigned int mbx,
+       unsigned int mby,
+       enum vlMotionType mc_type,
+       short top_x,
+       short top_y,
+       short bottom_x,
+       short bottom_y,
+       unsigned int coded_block_pattern,
+       enum vlDCTType dct_type,
+       short *blocks,
+       struct vlSurface *ref_surface,
+       struct vlSurface *surface
+)
+{
+       struct pipe_context             *pipe;
+       struct vlVertexShaderConsts     *vs_consts;
+
+       assert(motion_vectors);
+       assert(blocks);
+       assert(ref_surface);
+       assert(surface);
+
+       /* TODO: Implement interlaced rendering */
+       if (picture_type != vlPictureTypeFrame)
+               return 0;
+       /* TODO: Implement other MC types */
+       if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField)
+               return 0;
+
+       vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks);
+
+       pipe = mc->pipe;
+
+       vs_consts = pipe->winsys->buffer_map
+       (
+               pipe->winsys,
+               mc->vs_const_buf.buffer,
+               PIPE_BUFFER_USAGE_CPU_WRITE
+       );
+
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0];
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0];
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0];
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0];
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0];
+       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0];
+       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
+
+       if (mc_type == vlMotionTypeField)
+       {
+               vs_consts->denorm.x = (float)surface->texture->width[0];
+               vs_consts->denorm.y = (float)surface->texture->height[0];
+
+               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0];
+               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0];
+               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
+
+               pipe->bind_vs_state(pipe, mc->p_vs[1]);
+               pipe->bind_fs_state(pipe, mc->p_fs[1]);
+       }
+       else
+       {
+               pipe->bind_vs_state(pipe, mc->p_vs[0]);
+               pipe->bind_fs_state(pipe, mc->p_fs[0]);
+       }
+
+       pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
+
+       mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
+       (
+               pipe->screen,
+               surface->texture,
+               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+       );
+       pipe->set_framebuffer_state(pipe, &mc->render_target);
+
+       mc->textures[3] = ref_surface->texture;
+       pipe->set_sampler_textures(pipe, 4, mc->textures);
+       pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
+
+       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
+
+       return 0;
+}
+
+int vlRenderBMacroBlock
+(
+       struct vlR16SnormMC *mc,
+       enum vlPictureType picture_type,
+       enum vlFieldOrder field_order,
+       unsigned int mbx,
+       unsigned int mby,
+       enum vlMotionType mc_type,
+       short top_past_x,
+       short top_past_y,
+       short bottom_past_x,
+       short bottom_past_y,
+       short top_future_x,
+       short top_future_y,
+       short bottom_future_x,
+       short bottom_future_y,
+       unsigned int coded_block_pattern,
+       enum vlDCTType dct_type,
+       short *blocks,
+       struct vlSurface *past_surface,
+       struct vlSurface *future_surface,
+       struct vlSurface *surface
+)
+{
+       struct pipe_context             *pipe;
+       struct vlVertexShaderConsts     *vs_consts;
+
+       assert(motion_vectors);
+       assert(blocks);
+       assert(ref_surface);
+       assert(surface);
+
+       /* TODO: Implement interlaced rendering */
+       if (picture_type != vlPictureTypeFrame)
+               return 0;
+       /* TODO: Implement other MC types */
+       if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField)
+               return 0;
+
+       vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks);
+
+       pipe = mc->pipe;
+
+       vs_consts = pipe->winsys->buffer_map
+       (
+               pipe->winsys,
+               mc->vs_const_buf.buffer,
+               PIPE_BUFFER_USAGE_CPU_WRITE
+       );
+
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0];
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0];
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0];
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0];
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0];
+       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0];
+       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
+       vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0];
+       vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0];
+       vs_consts->mb_tc_trans[1].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[1].top_field.w = 0.0f;
+
+       if (mc_type == vlMotionTypeField)
+       {
+               vs_consts->denorm.x = (float)surface->texture->width[0];
+               vs_consts->denorm.y = (float)surface->texture->height[0];
+
+               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0];
+               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0];
+               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
+               vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0];
+               vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0];
+               vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f;
+
+               pipe->bind_vs_state(pipe, mc->b_vs[1]);
+               pipe->bind_fs_state(pipe, mc->b_fs[1]);
+       }
+       else
+       {
+               pipe->bind_vs_state(pipe, mc->b_vs[0]);
+               pipe->bind_fs_state(pipe, mc->b_fs[0]);
+       }
+
+       pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
+
+       mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
+       (
+               pipe->screen,
+               surface->texture,
+               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+       );
+       pipe->set_framebuffer_state(pipe, &mc->render_target);
+
+       mc->textures[3] = past_surface->texture;
+       mc->textures[4] = future_surface->texture;
+       pipe->set_sampler_textures(pipe, 5, mc->textures);
+       pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
+
+       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
+
+       return 0;
+}
+
+int vlRenderMacroBlocksMpeg2R16Snorm
+(
+       struct vlRender *render,
+       struct vlMpeg2MacroBlockBatch *batch,
+       struct vlSurface *surface
+)
+{
+       struct vlR16SnormMC     *mc;
+       unsigned int            i;
+
+       assert(render);
+
+       mc = (struct vlR16SnormMC*)render;
+
+       /*for (i = 0; i < batch->num_macroblocks; ++i)
+               vlGrabMacroBlock(batch->macroblocks[i]);*/
+
+       for (i = 0; i < batch->num_macroblocks; ++i)
+       {
+               switch (batch->macroblocks[i].mb_type)
+               {
+                       case vlMacroBlockTypeIntra:
+                       {
+                               vlRenderIMacroBlock
+                               (
+                                       mc,
+                                       batch->picture_type,
+                                       batch->field_order,
+                                       batch->macroblocks[i].mbx,
+                                       batch->macroblocks[i].mby,
+                                       batch->macroblocks[i].cbp,
+                                       batch->macroblocks[i].dct_type,
+                                       batch->macroblocks[i].blocks,
+                                       surface
+                               );
+                               break;
+                       }
+                       case vlMacroBlockTypeFwdPredicted:
+                       {
+                               vlRenderPMacroBlock
+                               (
+                                       mc,
+                                       batch->picture_type,
+                                       batch->field_order,
+                                       batch->macroblocks[i].mbx,
+                                       batch->macroblocks[i].mby,
+                                       batch->macroblocks[i].mo_type,
+                                       batch->macroblocks[i].PMV[0][0][0],
+                                       batch->macroblocks[i].PMV[0][0][1],
+                                       batch->macroblocks[i].PMV[1][0][0],
+                                       batch->macroblocks[i].PMV[1][0][1],
+                                       batch->macroblocks[i].cbp,
+                                       batch->macroblocks[i].dct_type,
+                                       batch->macroblocks[i].blocks,
+                                       batch->past_surface,
+                                       surface
+                               );
+                               break;
+                       }
+                       case vlMacroBlockTypeBkwdPredicted:
+                       {
+                               vlRenderPMacroBlock
+                               (
+                                       mc,
+                                       batch->picture_type,
+                                       batch->field_order,
+                                       batch->macroblocks[i].mbx,
+                                       batch->macroblocks[i].mby,
+                                       batch->macroblocks[i].mo_type,
+                                       batch->macroblocks[i].PMV[0][1][0],
+                                       batch->macroblocks[i].PMV[0][1][1],
+                                       batch->macroblocks[i].PMV[1][1][0],
+                                       batch->macroblocks[i].PMV[1][1][1],
+                                       batch->macroblocks[i].cbp,
+                                       batch->macroblocks[i].dct_type,
+                                       batch->macroblocks[i].blocks,
+                                       batch->future_surface,
+                                       surface
+                               );
+                               break;
+                       }
+                       case vlMacroBlockTypeBiPredicted:
+                       {
+                               vlRenderBMacroBlock
+                               (
+                                       mc,
+                                       batch->picture_type,
+                                       batch->field_order,
+                                       batch->macroblocks[i].mbx,
+                                       batch->macroblocks[i].mby,
+                                       batch->macroblocks[i].mo_type,
+                                       batch->macroblocks[i].PMV[0][0][0],
+                                       batch->macroblocks[i].PMV[0][0][1],
+                                       batch->macroblocks[i].PMV[1][0][0],
+                                       batch->macroblocks[i].PMV[1][0][1],
+                                       batch->macroblocks[i].PMV[0][1][0],
+                                       batch->macroblocks[i].PMV[0][1][1],
+                                       batch->macroblocks[i].PMV[1][1][0],
+                                       batch->macroblocks[i].PMV[1][1][1],
+                                       batch->macroblocks[i].cbp,
+                                       batch->macroblocks[i].dct_type,
+                                       batch->macroblocks[i].blocks,
+                                       batch->past_surface,
+                                       batch->future_surface,
+                                       surface
+                               );
+                               break;
+                       }
+                       default:
+                               assert(0);
+               }
+       }
+
+       return 0;
+}
+
+int vlEnd
+(
+       struct vlRender *render
+)
+{
+       assert(render);
+
+       return 0;
+}
+
+int vlDestroy
+(
+       struct vlRender *render
+)
+{
+       struct vlR16SnormMC     *mc;
+       struct pipe_context     *pipe;
+       unsigned int            i;
+
+       assert(render);
+
+       mc = (struct vlR16SnormMC*)render;
+       pipe = mc->pipe;
+
+       for (i = 0; i < 5; ++i)
+               pipe->delete_sampler_state(pipe, mc->samplers[i]);
+
+       for (i = 0; i < 3; ++i)
+               pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer);
+
+       /* Textures 3 & 4 are not created directly, no need to release them here */
+       for (i = 0; i < 3; ++i)
+               pipe_texture_release(&mc->textures[i]);
+
+       pipe->delete_vs_state(pipe, mc->i_vs);
+       pipe->delete_fs_state(pipe, mc->i_fs);
+
+       for (i = 0; i < 2; ++i)
+       {
+               pipe->delete_vs_state(pipe, mc->p_vs[i]);
+               pipe->delete_fs_state(pipe, mc->p_fs[i]);
+               pipe->delete_vs_state(pipe, mc->b_vs[i]);
+               pipe->delete_fs_state(pipe, mc->b_fs[i]);
+       }
+
+       pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
+       pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
+
+       free(mc);
+
+       return 0;
+}
+
+/*
+ * Represents 8 triangles (4 quads, 1 per block) in noormalized coords
+ * that render a macroblock.
+ * Need to be scaled to cover mbW*mbH macroblock pixels and translated into
+ * position on target surface.
+ */
+const struct vlVertex2f macroblock_verts[24] =
+{
+       {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f},
+       {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f},
+
+       {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f},
+       {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f},
+
+       {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f},
+       {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f},
+
+       {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f},
+       {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f}
+};
+
+/*
+ * Represents texcoords for the above for rendering 4 luma blocks arranged
+ * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at
+ * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH.
+ */
+const struct vlVertex2f macroblock_luma_texcoords[24] =
+{
+       {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f},
+       {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f},
+
+       {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f},
+       {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f},
+
+       {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f},
+       {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f},
+
+       {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f},
+       {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f}
+};
+
+/*
+ * Represents texcoords for the above for rendering 1 chroma block.
+ * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors.
+ */
+const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts;
+
+/*
+ * Represents texcoords for the above for rendering 2 chroma blocks arranged
+ * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at
+ * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping.
+ * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors.
+ */
+const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts;
+
+/*
+ * Represents texcoords for the above for rendering 4 chroma blocks.
+ * Same case as 4 luma blocks.
+ */
+const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords;
+
+/*
+ * Used when rendering P and B macroblocks, multiplier is applied to the A channel,
+ * which is then added to the L channel, then the bias is subtracted from that to
+ * get back the differential. The differential is then added to the samples from the
+ * reference surface(s).
+ */
+const struct vlFragmentShaderConsts fs_consts =
+{
+       {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
+       {0.5f, 2.0f, 0.0f, 0.0f}
+};
+
+static int vlCreateVertexShaderIMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 50;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Luma texcoords
+        * decl i2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale unit rect to macroblock size
+        * decl c1              ; Translation vector to move macroblock into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Luma texcoords
+        * decl o2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0 */
+       decl = vl_decl_temps(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, c1       ; Translate rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * mov o1, i1           ; Move input luma texcoords to output
+        * mov o2, i2           ; Move input chroma texcoords to output
+        */
+       for (i = 1; i < 3; ++i)
+       {
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       mc->i_vs = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShaderIMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /*
+        * decl i0                      ; Texcoords for s0
+        * decl i1                      ; Texcoords for s1, s2
+        */
+       for (i = 0; i < 2; ++i)
+       {
+               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0                      ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0, t1 */
+       decl = vl_decl_temps(0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl s0                      ; Sampler for luma texture
+        * decl s1                      ; Sampler for chroma Cb texture
+        * decl s2                      ; Sampler for chroma Cr texture
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               decl = vl_decl_samplers(i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
+       }
+
+       /*
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       }
+
+       /* mul o0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       mc->i_fs = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateVertexShaderFramePMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Luma texcoords
+        * decl i2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale unit rect to macroblock size
+        * decl c1              ; Translation vector to move macroblock into position
+        * decl c2              ; Unused
+        * decl c3              ; Translation vector to move ref macroblock texcoords into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Luma texcoords
+        * decl o2              ; Chroma texcoords
+        * decl o3              ; Ref macroblock texcoords
+        */
+       for (i = 0; i < 4; i++)
+       {
+               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0 */
+       decl = vl_decl_temps(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, c1       ; Translate rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * mov o1, i1           ; Move input luma texcoords to output
+        * mov o2, i2           ; Move input chroma texcoords to output
+        */
+       for (i = 1; i < 3; ++i)
+       {
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* add o3, t0, c3       ; Translate rect into position on ref macroblock */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateVertexShaderFieldPMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Luma texcoords
+        * decl i2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale unit rect to macroblock size
+        * decl c1              ; Translation vector to move macroblock into position
+        * decl c2              ; Denorm coefficients
+        * decl c3              ; Translation vector to move top field ref macroblock texcoords into position
+        * decl c4              ; Translation vector to move bottom field ref macroblock texcoords into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Luma texcoords
+        * decl o2              ; Chroma texcoords
+        * decl o3              ; Top field ref macroblock texcoords
+        * decl o4              ; Bottom field ref macroblock texcoords
+        * decl o5              ; Denormalized vertex pos
+        */
+       for (i = 0; i < 6; i++)
+       {
+               decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0, t1 */
+       decl = vl_decl_temps(0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add t1, t0, c1       ; Translate rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mov o0, t1           ; Move vertex pos to output */
+       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+       mov o1, i1              ; Move input luma texcoords to output
+       mov o2, i2              ; Move input chroma texcoords to output
+       */
+       for (i = 1; i < 3; ++i)
+       {
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* add o3, t0, c3       ; Translate top field rect into position on ref macroblock
+          add o4, t0, c4       ; Translate bottom field rect into position on ref macroblock */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* mul o5, t1, c2       ; Denorm vertex pos */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShaderFramePMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /*
+        * decl i0                      ; Texcoords for s0
+        * decl i1                      ; Texcoords for s1, s2
+        * decl i2                      ; Texcoords for s3
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0                      ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0, t1 */
+       decl = vl_decl_temps(0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl s0                      ; Sampler for luma texture
+        * decl s1                      ; Sampler for chroma Cb texture
+        * decl s2                      ; Sampler for chroma Cr texture
+        * decl s3                      ; Sampler for ref surface texture
+        */
+       for (i = 0; i < 4; ++i)
+       {
+               decl = vl_decl_samplers(i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       }
+
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* tex2d t1, i2, s3             ; Read texel from ref macroblock */
+       inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, t1               ; Add ref and differential to form final output */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShaderFieldPMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 200;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /*
+        * decl i0                      ; Texcoords for s0
+        * decl i1                      ; Texcoords for s1, s2
+        * decl i2                      ; Texcoords for s3
+        * decl i3                      ; Texcoords for s3
+        * decl i4                      ; Denormalized vertex pos
+        */
+       for (i = 0; i < 5; ++i)
+       {
+               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0                      ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0-t4 */
+       decl = vl_decl_temps(0, 4);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl s0                      ; Sampler for luma texture
+        * decl s1                      ; Sampler for chroma Cb texture
+        * decl s2                      ; Sampler for chroma Cr texture
+        * decl s3                      ; Sampler for ref surface texture
+        */
+       for (i = 0; i < 4; ++i)
+       {
+               decl = vl_decl_samplers(i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       }
+
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * tex2d t1, i2, s3             ; Read texel from ref macroblock top field
+        * tex2d t2, i3, s3             ; Read texel from ref macroblock bottom field
+        */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* XXX: Pos values off by 0.5? */
+       /* sub t4, i4.y, c1.x           ; Sub 0.5 from denormalized pos */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* floor t3, t3                 ; Get rid of fractional part */
+       inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t3, t3, c1.y             ; Multiply by 2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* sub t3, t4, t3               ; Subtract from original Y to get Y % 2 */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+       /* lerp t1, t3, t1, t2          ; Choose between top and bottom fields based on Y % 2 */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, t1               ; Add ref and differential to form final output */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateVertexShaderFrameBMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Luma texcoords
+        * decl i2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale unit rect to macroblock size
+        * decl c1              ; Translation vector to move macroblock into position
+        * decl c2              ; Unused
+        * decl c3              ; Translation vector to move past ref macroblock texcoords into position
+        * decl c4              ; Unused
+        * decl c5              ; Translation vector to move future ref macroblock texcoords into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Luma texcoords
+        * decl o2              ; Chroma texcoords
+        * decl o3              ; Past ref macroblock texcoords
+        * decl o4              ; Future ref macroblock texcoords
+        */
+       for (i = 0; i < 5; i++)
+       {
+               decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0 */
+       decl = vl_decl_temps(0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, c1       ; Translate rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * mov o1, i1           ; Move input luma texcoords to output
+        * mov o2, i2           ; Move input chroma texcoords to output
+        */
+       for (i = 1; i < 3; ++i)
+       {
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* add o3, t0, c3       ; Translate rect into position on past ref macroblock
+          add o4, t0, c5       ; Translate rect into position on future ref macroblock */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateVertexShaderFieldBMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
+
+       ti = 3;
+
+       /*
+        * decl i0              ; Vertex pos
+        * decl i1              ; Luma texcoords
+        * decl i2              ; Chroma texcoords
+        */
+       for (i = 0; i < 3; i++)
+       {
+               decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0              ; Scaling vector to scale unit rect to macroblock size
+        * decl c1              ; Translation vector to move macroblock into position
+        * decl c2              ; Denorm coefficients
+        * decl c3              ; Translation vector to move top field past ref macroblock texcoords into position
+        * decl c4              ; Translation vector to move bottom field past ref macroblock texcoords into position
+        * decl c5              ; Translation vector to move top field future ref macroblock texcoords into position
+        * decl c6              ; Translation vector to move bottom field future ref macroblock texcoords into position
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl o0              ; Vertex pos
+        * decl o1              ; Luma texcoords
+        * decl o2              ; Chroma texcoords
+        * decl o3              ; Top field past ref macroblock texcoords
+        * decl o4              ; Bottom field past ref macroblock texcoords
+        * decl o5              ; Top field future ref macroblock texcoords
+        * decl o6              ; Bottom field future ref macroblock texcoords
+        * decl o7              ; Denormalized vertex pos
+        */
+       for (i = 0; i < 8; i++)
+       {
+               decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* decl t0, t1 */
+       decl = vl_decl_temps(0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t0, i0, c0       ; Scale unit rect to normalized MB size */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add t1, t0, c1       ; Translate rect into position */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mov o0, t1           ; Move vertex pos to output */
+       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * mov o1, i1           ; Move input luma texcoords to output
+        * mov o2, i2           ; Move input chroma texcoords to output
+        */
+       for (i = 1; i < 3; ++i)
+       {
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * add o3, t0, c3       ; Translate top field rect into position on past ref macroblock
+        * add o4, t0, c4       ; Translate bottom field rect into position on past ref macroblock
+        * add o5, t0, c5       ; Translate top field rect into position on future ref macroblock
+        * add o6, t0, c6       ; Translate bottom field rect into position on future ref macroblock
+        */
+       for (i = 0; i < 4; ++i)
+       {
+               inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* mul o7, t1, c2       ; Denorm vertex pos */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       vs.tokens = tokens;
+       mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShaderFrameBMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 100;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /*
+        * decl i0                      ; Texcoords for s0
+        * decl i1                      ; Texcoords for s1, s2
+        * decl i2                      ; Texcoords for s3
+        * decl i3                      ; Texcoords for s4
+        */
+       for (i = 0; i < 4; ++i)
+       {
+               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constant 1/2 in .x channel to use as weight to blend past and future texels
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0                      ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0-t2 */
+       decl = vl_decl_temps(0, 2);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl s0                      ; Sampler for luma texture
+        * decl s1                      ; Sampler for chroma Cb texture
+        * decl s2                      ; Sampler for chroma Cr texture
+        * decl s3                      ; Sampler for past ref surface texture
+        * decl s4                      ; Sampler for future ref surface texture
+        */
+       for (i = 0; i < 5; ++i)
+       {
+               decl = vl_decl_samplers(i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       }
+
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * tex2d t1, i2, s3             ; Read texel from past ref macroblock
+        * tex2d t2, i3, s4             ; Read texel from future ref macroblock
+        */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+static int vlCreateFragmentShaderFieldBMB
+(
+       struct vlR16SnormMC *mc
+)
+{
+       const unsigned int              max_tokens = 200;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+
+       unsigned int                    ti;
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+       /* Processor */
+       *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+       ti = 3;
+
+       /*
+        * decl i0                      ; Texcoords for s0
+        * decl i1                      ; Texcoords for s1, s2
+        * decl i2                      ; Texcoords for s3
+        * decl i3                      ; Texcoords for s3
+        * decl i4                      ; Texcoords for s4
+        * decl i5                      ; Texcoords for s4
+        * decl i6                      ; Denormalized vertex pos
+        */
+       for (i = 0; i < 7; ++i)
+       {
+               decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
+        *                              ; and for Y-mod-2 top/bottom field selection
+        */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl o0                      ; Fragment color */
+       decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /* decl t0-t5 */
+       decl = vl_decl_temps(0, 5);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * decl s0                      ; Sampler for luma texture
+        * decl s1                      ; Sampler for chroma Cb texture
+        * decl s2                      ; Sampler for chroma Cr texture
+        * decl s3                      ; Sampler for past ref surface texture
+        * decl s4                      ; Sampler for future ref surface texture
+        */
+       for (i = 0; i < 5; ++i)
+       {
+               decl = vl_decl_samplers(i, i);
+               ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /*
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       }
+
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* XXX: Pos values off by 0.5? */
+       /* sub t4, i6.y, c1.x           ; Sub 0.5 from denormalized pos */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* floor t3, t3                 ; Get rid of fractional part */
+       inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* mul t3, t3, c1.y             ; Multiply by 2 */
+       inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* sub t3, t4, t3               ; Subtract from original Y to get Y % 2 */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * tex2d t1, i2, s3             ; Read texel from past ref macroblock top field
+        * tex2d t2, i3, s3             ; Read texel from past ref macroblock bottom field
+        */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+       /* lerp t1, t3, t1, t2          ; Choose between top and bottom fields based on Y % 2 */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /*
+        * tex2d t4, i4, s4             ; Read texel from future ref macroblock top field
+        * tex2d t5, i5, s4             ; Read texel from future ref macroblock bottom field
+        */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+       }
+
+       /* TODO: Move to conditional tex fetch on t3 instead of lerp */
+       /* lerp t2, t3, t4, t5          ; Choose between top and bottom fields based on Y % 2 */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
+       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       /* end */
+       inst = vl_end();
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+       fs.tokens = tokens;
+       mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
+       free(tokens);
+
+       return 0;
+}
+
+int vlCreateDataBufs
+(
+       struct vlR16SnormMC *mc
+)
+{
+       struct pipe_context     *pipe;
+       unsigned int            i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+
+       /* Create our vertex buffer and vertex buffer element */
+       mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f);
+       mc->vertex_bufs[0].max_index = 23;
+       mc->vertex_bufs[0].buffer_offset = 0;
+       mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_VERTEX,
+               sizeof(struct vlVertex2f) * 24
+       );
+
+       mc->vertex_elems[0].src_offset = 0;
+       mc->vertex_elems[0].vertex_buffer_index = 0;
+       mc->vertex_elems[0].nr_components = 2;
+       mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+       /* Create our texcoord buffers and texcoord buffer elements */
+       for (i = 1; i < 3; ++i)
+       {
+               mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f);
+               mc->vertex_bufs[i].max_index = 23;
+               mc->vertex_bufs[i].buffer_offset = 0;
+               mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create
+               (
+                       pipe->winsys,
+                       1,
+                       PIPE_BUFFER_USAGE_VERTEX,
+                       sizeof(struct vlVertex2f) * 24
+               );
+
+               mc->vertex_elems[i].src_offset = 0;
+               mc->vertex_elems[i].vertex_buffer_index = i;
+               mc->vertex_elems[i].nr_components = 2;
+               mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
+       }
+
+       /* Fill buffers */
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               macroblock_verts,
+               sizeof(struct vlVertex2f) * 24
+       );
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               macroblock_luma_texcoords,
+               sizeof(struct vlVertex2f) * 24
+       );
+       /* TODO: Accomodate 422, 444 */
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               macroblock_chroma_420_texcoords,
+               sizeof(struct vlVertex2f) * 24
+       );
+
+       for (i = 0; i < 3; ++i)
+               pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer);
+
+       /* Create our constant buffer */
+       mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
+       mc->vs_const_buf.buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_CONSTANT,
+               mc->vs_const_buf.size
+       );
+
+       mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
+       mc->fs_const_buf.buffer = pipe->winsys->buffer_create
+       (
+               pipe->winsys,
+               1,
+               PIPE_BUFFER_USAGE_CONSTANT,
+               mc->fs_const_buf.size
+       );
+
+       memcpy
+       (
+               pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+               &fs_consts,
+               sizeof(struct vlFragmentShaderConsts)
+       );
+
+       pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
+
+       return 0;
+}
+
+static int vlInit
+(
+       struct vlR16SnormMC *mc
+)
+{
+       struct pipe_context             *pipe;
+       struct pipe_sampler_state       sampler;
+       struct pipe_texture             template;
+       unsigned int                    filters[5];
+       unsigned int                    i;
+
+       assert(mc);
+
+       pipe = mc->pipe;
+
+       /* For MC we render to textures, which are rounded up to nearest POT */
+       mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width);
+       mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height);
+       mc->viewport.scale[2] = 1;
+       mc->viewport.scale[3] = 1;
+       mc->viewport.translate[0] = 0;
+       mc->viewport.translate[1] = 0;
+       mc->viewport.translate[2] = 0;
+       mc->viewport.translate[3] = 0;
+
+       mc->render_target.width = vlRoundUpPOT(mc->video_width);
+       mc->render_target.height = vlRoundUpPOT(mc->video_height);
+       mc->render_target.num_cbufs = 1;
+       /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */
+       mc->render_target.zsbuf = NULL;
+
+       filters[0] = PIPE_TEX_FILTER_NEAREST;
+       filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+       filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+       filters[3] = PIPE_TEX_FILTER_LINEAR;
+       filters[4] = PIPE_TEX_FILTER_LINEAR;
+
+       for (i = 0; i < 5; ++i)
+       {
+               sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+               sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+               sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+               sampler.min_img_filter = filters[i];
+               sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+               sampler.mag_img_filter = filters[i];
+               sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+               sampler.compare_func = PIPE_FUNC_ALWAYS;
+               sampler.normalized_coords = 1;
+               /*sampler.prefilter = ;*/
+               /*sampler.shadow_ambient = ;*/
+               /*sampler.lod_bias = ;*/
+               sampler.min_lod = 0;
+               /*sampler.max_lod = ;*/
+               /*sampler.border_color[i] = ;*/
+               /*sampler.max_anisotropy = ;*/
+               mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
+       }
+
+       memset(&template, 0, sizeof(struct pipe_texture));
+       template.target = PIPE_TEXTURE_2D;
+       template.format = PIPE_FORMAT_R16_SNORM;
+       template.last_level = 0;
+       template.width[0] = 8;
+       template.height[0] = 8 * 4;
+       template.depth[0] = 1;
+       template.compressed = 0;
+       pf_get_block(template.format, &template.block);
+
+       mc->textures[0] = pipe->screen->texture_create(pipe->screen, &template);
+
+       if (mc->video_format == vlFormatYCbCr420)
+               template.height[0] = 8;
+       else if (mc->video_format == vlFormatYCbCr422)
+               template.height[0] = 8 * 2;
+       else if (mc->video_format == vlFormatYCbCr444)
+               template.height[0] = 8 * 4;
+       else
+               assert(0);
+
+       mc->textures[1] = pipe->screen->texture_create(pipe->screen, &template);
+       mc->textures[2] = pipe->screen->texture_create(pipe->screen, &template);
+
+       /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
+
+       vlCreateVertexShaderIMB(mc);
+       vlCreateFragmentShaderIMB(mc);
+       vlCreateVertexShaderFramePMB(mc);
+       vlCreateVertexShaderFieldPMB(mc);
+       vlCreateFragmentShaderFramePMB(mc);
+       vlCreateFragmentShaderFieldPMB(mc);
+       vlCreateVertexShaderFrameBMB(mc);
+       vlCreateVertexShaderFieldBMB(mc);
+       vlCreateFragmentShaderFrameBMB(mc);
+       vlCreateFragmentShaderFieldBMB(mc);
+       vlCreateDataBufs(mc);
+
+       return 0;
+}
+
+int vlCreateR16SNormMC
+(
+       struct pipe_context *pipe,
+       unsigned int video_width,
+       unsigned int video_height,
+       enum vlFormat video_format,
+       struct vlRender **render
+)
+{
+       struct vlR16SnormMC *mc;
+
+       assert(pipe);
+       assert(render);
+
+       mc = calloc(1, sizeof(struct vlR16SnormMC));
+
+       mc->base.vlBegin = &vlBegin;
+       mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm;
+       mc->base.vlEnd = &vlEnd;
+       mc->base.vlDestroy = &vlDestroy;
+       mc->pipe = pipe;
+       mc->video_width = video_width;
+       mc->video_height = video_height;
+
+       vlInit(mc);
+
+       *render = &mc->base;
+
+       return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h
new file mode 100644 (file)
index 0000000..a6eecf0
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef vl_mc_h
+#define vl_mc_h
+
+#include "vl_types.h"
+
+struct pipe_context;
+struct vlRender;
+
+int vlCreateR16SNormMC
+(
+       struct pipe_context *pipe,
+       unsigned int video_width,
+       unsigned int video_height,
+       enum vlFormat video_format,
+       struct vlRender **render
+);
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h
new file mode 100644 (file)
index 0000000..63016b5
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef vl_render_h
+#define vl_render_h
+
+#include "vl_types.h"
+
+struct pipe_surface;
+
+struct vlRender
+{
+       int (*vlBegin)
+       (
+               struct vlRender *render
+       );
+
+       int (*vlRenderMacroBlocksMpeg2)
+       (
+               struct vlRender *render,
+               struct vlMpeg2MacroBlockBatch *batch,
+               struct vlSurface *surface
+       );
+
+       int (*vlEnd)
+       (
+               struct vlRender *render
+       );
+
+       int (*vlDestroy)
+       (
+               struct vlRender *render
+       );
+};
+
+#endif
diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c
new file mode 100644 (file)
index 0000000..484f63b
--- /dev/null
@@ -0,0 +1,115 @@
+#define VL_INTERNAL
+#include "vl_screen.h"
+#include <assert.h>
+#include <stdlib.h>
+
+int vlCreateScreen
+(
+       struct vlDisplay *display,
+       int screen,
+       struct pipe_screen *pscreen,
+       struct vlScreen **vl_screen
+)
+{
+       struct vlScreen *scrn;
+
+       assert(display);
+       assert(pscreen);
+       assert(vl_screen);
+
+       scrn = calloc(1, sizeof(struct vlScreen));
+
+       if (!scrn)
+               return 1;
+
+       scrn->display = display;
+       scrn->ordinal = screen;
+       scrn->pscreen = pscreen;
+       *vl_screen = scrn;
+
+       return 0;
+}
+
+int vlDestroyScreen
+(
+       struct vlScreen *screen
+)
+{
+       assert(screen);
+
+       free(screen);
+
+       return 0;
+}
+
+struct vlDisplay* vlGetDisplay
+(
+       struct vlScreen *screen
+)
+{
+       assert(screen);
+
+       return screen->display;
+}
+
+struct pipe_screen* vlGetPipeScreen
+(
+       struct vlScreen *screen
+)
+{
+       assert(screen);
+
+       return screen->pscreen;
+}
+
+unsigned int vlGetMaxProfiles
+(
+       struct vlScreen *screen
+)
+{
+       assert(screen);
+
+       return vlProfileCount;
+}
+
+int vlQueryProfiles
+(
+       struct vlScreen *screen,
+       enum vlProfile *profiles
+)
+{
+       assert(screen);
+       assert(profiles);
+
+       profiles[0] = vlProfileMpeg2Simple;
+       profiles[1] = vlProfileMpeg2Main;
+
+       return 0;
+}
+
+unsigned int vlGetMaxEntryPoints
+(
+       struct vlScreen *screen
+)
+{
+       assert(screen);
+
+       return vlEntryPointCount;
+}
+
+int vlQueryEntryPoints
+(
+       struct vlScreen *screen,
+       enum vlProfile profile,
+       enum vlEntryPoint *entry_points
+)
+{
+       assert(screen);
+       assert(entry_points);
+
+       entry_points[0] = vlEntryPointIDCT;
+       entry_points[1] = vlEntryPointMC;
+       entry_points[2] = vlEntryPointCSC;
+
+       return 0;
+}
diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.h b/src/gallium/state_trackers/g3dvl/vl_screen.h
new file mode 100644 (file)
index 0000000..98f3d42
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef vl_screen_h
+#define vl_screen_h
+
+#include "vl_types.h"
+
+struct pipe_screen;
+
+#ifdef VL_INTERNAL
+struct vlScreen
+{
+       struct vlDisplay        *display;
+       unsigned int            ordinal;
+       struct pipe_screen      *pscreen;
+};
+#endif
+
+int vlCreateScreen
+(
+       struct vlDisplay *display,
+       int screen,
+       struct pipe_screen *pscreen,
+       struct vlScreen **vl_screen
+);
+
+int vlDestroyScreen
+(
+       struct vlScreen *screen
+);
+
+struct vlDisplay* vlGetDisplay
+(
+       struct vlScreen *screen
+);
+
+struct pipe_screen* vlGetPipeScreen
+(
+       struct vlScreen *screen
+);
+
+unsigned int vlGetMaxProfiles
+(
+       struct vlScreen *screen
+);
+
+int vlQueryProfiles
+(
+       struct vlScreen *screen,
+       enum vlProfile *profiles
+);
+
+unsigned int vlGetMaxEntryPoints
+(
+       struct vlScreen *screen
+);
+
+int vlQueryEntryPoints
+(
+       struct vlScreen *screen,
+       enum vlProfile profile,
+       enum vlEntryPoint *entry_points
+);
+
+#endif
index 5f30e23ff8e12c173f4fb09538a97505af55f477..51f1721a3325321a0df8e0460754dddc99d25eff 100644 (file)
@@ -13,7 +13,7 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index
        decl.Semantic.SemanticIndex = index;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
@@ -27,7 +27,7 @@ struct tgsi_full_declaration vl_decl_interpolated_input
 )
 {
        struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-       
+
        assert
        (
                interpolation == TGSI_INTERPOLATE_CONSTANT ||
@@ -42,21 +42,21 @@ struct tgsi_full_declaration vl_decl_interpolated_input
        decl.Declaration.Interpolate = interpolation;;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
 struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
 {
        struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-       
+
        decl.Declaration.File = TGSI_FILE_CONSTANT;
        decl.Declaration.Semantic = 1;
        decl.Semantic.SemanticName = name;
        decl.Semantic.SemanticIndex = index;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
@@ -70,7 +70,7 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde
        decl.Semantic.SemanticIndex = index;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
@@ -82,7 +82,7 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last
        decl.Declaration.File = TGSI_FILE_TEMPORARY;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
@@ -94,7 +94,7 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l
        decl.Declaration.File = TGSI_FILE_SAMPLER;
        decl.DeclarationRange.First = first;
        decl.DeclarationRange.Last = last;
-       
+
        return decl;
 }
 
@@ -108,7 +108,7 @@ struct tgsi_full_instruction vl_inst2
 )
 {
        struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-       
+
        inst.Instruction.Opcode = opcode;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = dst_file;
@@ -116,7 +116,7 @@ struct tgsi_full_instruction vl_inst2
        inst.Instruction.NumSrcRegs = 1;
        inst.FullSrcRegisters[0].SrcRegister.File = src_file;
        inst.FullSrcRegisters[0].SrcRegister.Index = src_index;
-       
+
        return inst;
 }
 
@@ -132,7 +132,7 @@ struct tgsi_full_instruction vl_inst3
 )
 {
        struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-       
+
        inst.Instruction.Opcode = opcode;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = dst_file;
@@ -142,7 +142,7 @@ struct tgsi_full_instruction vl_inst3
        inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
        inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
        inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-       
+
        return inst;
 }
 
@@ -158,7 +158,7 @@ struct tgsi_full_instruction vl_tex
 )
 {
        struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-       
+
        inst.Instruction.Opcode = TGSI_OPCODE_TEX;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = dst_file;
@@ -169,7 +169,7 @@ struct tgsi_full_instruction vl_tex
        inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
        inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
        inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-       
+
        return inst;
 }
 
@@ -187,7 +187,7 @@ struct tgsi_full_instruction vl_inst4
 )
 {
        struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-       
+
        inst.Instruction.Opcode = opcode;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = dst_file;
@@ -199,18 +199,17 @@ struct tgsi_full_instruction vl_inst4
        inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
        inst.FullSrcRegisters[2].SrcRegister.File = src3_file;
        inst.FullSrcRegisters[2].SrcRegister.Index = src3_index;
-       
+
        return inst;
 }
 
 struct tgsi_full_instruction vl_end(void)
 {
        struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-       
+
        inst.Instruction.Opcode = TGSI_OPCODE_END;
        inst.Instruction.NumDstRegs = 0;
        inst.Instruction.NumSrcRegs = 0;
-       
+
        return inst;
 }
-
index 878d7e2c45740adbc71bc7bc43752da6bfcf0e91..dc615cb1566fc252c4e69501abe3f570493947ef 100644 (file)
@@ -59,4 +59,3 @@ struct tgsi_full_instruction vl_inst4
 struct tgsi_full_instruction vl_end(void);
 
 #endif
-
index 1386b1107cc559f4fa618114a64031763b789a74..ffc81221728271b2f041ea105f92b8ba27ea109e 100644 (file)
+#define VL_INTERNAL
 #include "vl_surface.h"
 #include <assert.h>
 #include <stdlib.h>
-#include <pipe/p_context.h>
+#include <string.h>
+#include <pipe/p_screen.h>
 #include <pipe/p_state.h>
-#include <pipe/p_format.h>
 #include <pipe/p_inlines.h>
 #include <vl_winsys.h>
+#include "vl_screen.h"
 #include "vl_context.h"
-#include "vl_defs.h"
+#include "vl_render.h"
+#include "vl_csc.h"
 #include "vl_util.h"
 
-/*#define DO_IDCT*/
-
-#ifdef DO_IDCT
-static int vlTransformBlock(short *src, short *dst, short bias)
+int vlCreateSurface
+(
+       struct vlScreen *screen,
+       unsigned int width,
+       unsigned int height,
+       enum vlFormat format,
+       struct vlSurface **surface
+)
 {
-       static const float basis[8][8] =
-       {
-               {0.3536,   0.4904,   0.4619,   0.4157,   0.3536,   0.2778,   0.1913,   0.0975},
-               {0.3536,   0.4157,   0.1913,  -0.0975,  -0.3536,  -0.4904,  -0.4619,  -0.2778},
-               {0.3536,   0.2778,  -0.1913,  -0.4904,  -0.3536,   0.0975,   0.4619,   0.4157},
-               {0.3536,   0.0975,  -0.4619,  -0.2778,   0.3536,   0.4157,  -0.1913,  -0.4904},
-               {0.3536,  -0.0975,  -0.4619,   0.2778,   0.3536,  -0.4157,  -0.1913,   0.4904},
-               {0.3536,  -0.2778,  -0.1913,   0.4904,  -0.3536,  -0.0975,   0.4619,  -0.4157},
-               {0.3536,  -0.4157,   0.1913,   0.0975,  -0.3536,   0.4904,  -0.4619,   0.2778},
-               {0.3536,  -0.4904,   0.4619,  -0.4157,   0.3536,  -0.2778,   0.1913,  -0.0975}
-       };
-       
-       unsigned int    x, y;
-       short           tmp[64];
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                       tmp[y * VL_BLOCK_WIDTH + x] = (short)
-                       (
-                               src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] +
-                               src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] +
-                               src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] +
-                               src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] +
-                               src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] +
-                               src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] +
-                               src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] +
-                               src[y * VL_BLOCK_WIDTH + 7] * basis[x][7]
-                       );
-
-       for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-               for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               {
-                       dst[y * VL_BLOCK_WIDTH + x] = bias + (short)
-                       (
-                               tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] +
-                               tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] +
-                               tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] +
-                               tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] +
-                               tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] +
-                               tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] +
-                               tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] +
-                               tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7]
-                       );
-                       if (dst[y * VL_BLOCK_WIDTH + x] > 255)
-                               dst[y * VL_BLOCK_WIDTH + x] = 255;
-                       else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0)
-                               dst[y * VL_BLOCK_WIDTH + x] = 0;
-               }
-       return 0;
-}
-#endif
+       struct vlSurface        *sfc;
+       struct pipe_texture     template;
 
-static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
-{
-       unsigned int y;
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               memcpy
-               (
-                       dst + y * dst_pitch,
-                       src + y * VL_BLOCK_WIDTH,
-                       VL_BLOCK_WIDTH * 2
-               );
-       
-       return 0;
-}
+       assert(screen);
+       assert(surface);
 
-static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
-{
-       unsigned int y;
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
-               memcpy
-               (
-                       dst + y * dst_pitch * 2,
-                       src + y * VL_BLOCK_WIDTH,
-                       VL_BLOCK_WIDTH * 2
-               );
-       
-       dst += VL_BLOCK_HEIGHT * dst_pitch;
-       
-       for (; y < VL_BLOCK_HEIGHT; ++y)
-               memcpy
-               (
-                       dst + y * dst_pitch * 2,
-                       src + y * VL_BLOCK_WIDTH,
-                       VL_BLOCK_WIDTH * 2
-               );
-       
-       return 0;
-}
+       sfc = calloc(1, sizeof(struct vlSurface));
 
-static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
-{
-       unsigned int y;
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               memset
-               (
-                       dst + y * dst_pitch,
-                       0,
-                       VL_BLOCK_WIDTH * 2
-               );
-       
-       return 0;
-}
+       if (!sfc)
+               return 1;
 
-static int vlGrabBlocks
-(
-       struct VL_CONTEXT *context,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       enum VL_SAMPLE_TYPE sample_type,
-       short *blocks
-)
-{
-       struct pipe_surface     *tex_surface;
-       short                   *texels;
-       unsigned int            tex_pitch;
-       unsigned int            tb, sb = 0;
-       
-       assert(context);
-       assert(blocks);
-       
-       tex_surface = context->pipe->screen->get_tex_surface
-       (
-               context->pipe->screen,
-               context->states.mc.textures[0],
-               0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
-       );
-       
-       texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
-       tex_pitch = tex_surface->stride / tex_surface->block.size;
-       
-       for (tb = 0; tb < 4; ++tb)
-       {
-               if ((coded_block_pattern >> (5 - tb)) & 1)
-               {
-                       short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
-                       
-#ifdef DO_IDCT
-                       vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0);
-#endif
-                       
-                       if (dct_type == VL_DCT_FRAME_CODED)
-                               vlGrabFrameCodedBlock
-                               (
-                                       cur_block,
-                                       texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
-                                       tex_pitch
-                               );
-                       else
-                               vlGrabFieldCodedBlock
-                               (
-                                       cur_block,
-                                       texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
-                                       tex_pitch
-                               );
-                       
-                       ++sb;
-               }
-               else
-                       vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch);
-       }
-       
-       pipe_surface_unmap(tex_surface);
-       
-       /* TODO: Implement 422, 444 */
-       for (tb = 0; tb < 2; ++tb)
-       {
-               tex_surface = context->pipe->screen->get_tex_surface
-                       (
-                               context->pipe->screen,
-                               context->states.mc.textures[tb + 1],
-                               0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
-                       );
-       
-               texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
-               tex_pitch = tex_surface->stride / tex_surface->block.size;
-               
-               if ((coded_block_pattern >> (1 - tb)) & 1)
-               {
-                       short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
-                       
-#ifdef DO_IDCT
-                       vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0);
-#endif
-                       
-                       vlGrabFrameCodedBlock
-                       (
-                               cur_block,
-                               texels,
-                               tex_pitch
-                       );
-                       
-                       ++sb;
-               }
-               else
-                       vlGrabNoBlock(texels, tex_pitch);
-               
-               pipe_surface_unmap(tex_surface);
-       }
-       
-       return 0;
-}
+       sfc->screen = screen;
+       sfc->width = width;
+       sfc->height = height;
+       sfc->format = format;
 
-int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface)
-{
-       struct pipe_context     *pipe;
-       struct pipe_texture     template;
-       struct VL_SURFACE       *sfc;
-       
-       assert(context);
-       assert(surface);
-       
-       pipe = context->pipe;
-       
-       sfc = calloc(1, sizeof(struct VL_SURFACE));
-       
-       sfc->context = context;
-       sfc->width = vlRoundUpPOT(context->video_width);
-       sfc->height = vlRoundUpPOT(context->video_height);
-       sfc->format = context->video_format;
-       
        memset(&template, 0, sizeof(struct pipe_texture));
        template.target = PIPE_TEXTURE_2D;
        template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
        template.last_level = 0;
-       template.width[0] = sfc->width;
-       template.height[0] = sfc->height;
+       template.width[0] = vlRoundUpPOT(sfc->width);
+       template.height[0] = vlRoundUpPOT(sfc->height);
        template.depth[0] = 1;
        template.compressed = 0;
        pf_get_block(template.format, &template.block);
-       /* XXX: Needed? */
        template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
-       
-       sfc->texture = pipe->screen->texture_create(pipe->screen, &template);
-       
+
+       sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template);
+
        *surface = sfc;
-       
+
        return 0;
 }
 
-int vlDestroySurface(struct VL_SURFACE *surface)
+int vlDestroySurface
+(
+       struct vlSurface *surface
+)
 {
        assert(surface);
+
        pipe_texture_release(&surface->texture);
        free(surface);
-       
+
        return 0;
 }
 
-int vlRenderIMacroBlock
+int vlRenderMacroBlocksMpeg2
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *surface
+       struct vlMpeg2MacroBlockBatch *batch,
+       struct vlSurface *surface
 )
 {
-       struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vs_consts;
-       
-       assert(blocks);
+       assert(batch);
        assert(surface);
-       
-       /* TODO: Implement interlaced rendering */
-       if (picture_type != VL_FRAME_PICTURE)
-               return 0;
-       
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
-       
-       pipe = surface->context->pipe;
-       
-       vs_consts = pipe->winsys->buffer_map
-       (
-               pipe->winsys,
-               surface->context->states.mc.vs_const_buf.buffer,
-               PIPE_BUFFER_USAGE_CPU_WRITE
-       );
-       
-       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vs_consts->scale.z = 1.0f;
-       vs_consts->scale.w = 1.0f;
-       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vs_consts->mb_pos_trans.z = 0.0f;
-       vs_consts->mb_pos_trans.w = 0.0f;
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
-       
-       surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
+
+       surface->context->render->vlBegin(surface->context->render);
+
+       surface->context->render->vlRenderMacroBlocksMpeg2
        (
-               pipe->screen,
-               surface->texture,
-               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+               surface->context->render,
+               batch,
+               surface
        );
-       pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target);
-       pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures);
-       pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers);
-       pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs);
-       pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs);
-       
-       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
-       
+
+       surface->context->render->vlEnd(surface->context->render);
+
        return 0;
 }
 
-int vlRenderPMacroBlock
+int vlPutPicture
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       enum VL_MC_TYPE mc_type,
-       struct VL_MOTION_VECTOR *motion_vector,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *ref_surface,
-       struct VL_SURFACE *surface
+       struct vlSurface *surface,
+       vlNativeDrawable drawable,
+       int srcx,
+       int srcy,
+       int srcw,
+       int srch,
+       int destx,
+       int desty,
+       int destw,
+       int desth,
+       enum vlPictureType picture_type
 )
 {
+       struct vlCSC            *csc;
        struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vs_consts;
-       
-       assert(motion_vectors);
-       assert(blocks);
-       assert(ref_surface);
+
        assert(surface);
-       
-       /* TODO: Implement interlaced rendering */
-       if (picture_type != VL_FRAME_PICTURE)
-               return 0;
-       /* TODO: Implement other MC types */
-       if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
-               return 0;
-       
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-       
+       assert(surface->context);
+
+       csc = surface->context->csc;
        pipe = surface->context->pipe;
-       
-       vs_consts = pipe->winsys->buffer_map
+
+       csc->vlResizeFrameBuffer(csc, destw, desth);
+
+       csc->vlBegin(csc);
+
+       csc->vlPutPicture
        (
-               pipe->winsys,
-               surface->context->states.mc.vs_const_buf.buffer,
-               PIPE_BUFFER_USAGE_CPU_WRITE
+               csc,
+               surface,
+               srcx,
+               srcy,
+               srcw,
+               srch,
+               destx,
+               desty,
+               destw,
+               desth,
+               picture_type
        );
-       
-       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vs_consts->scale.z = 1.0f;
-       vs_consts->scale.w = 1.0f;
-       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vs_consts->mb_pos_trans.z = 0.0f;
-       vs_consts->mb_pos_trans.w = 0.0f;
-       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width;
-       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height;
-       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
-       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
-       
-       if (mc_type == VL_FIELD_MC)
-       {
-               vs_consts->denorm.x = (float)surface->width;
-               vs_consts->denorm.y = (float)surface->height;
-               
-               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width;
-               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height;
-               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
-               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
-               
-               pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]);
-               pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]);
-       }
-       else
-       {
-               pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]);
-               pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]);
-       }
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
-       
-       surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
+
+       csc->vlEnd(csc);
+
+       pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+       bind_pipe_drawable(pipe, drawable);
+       /* TODO: Need to take destx, desty into consideration */
+       pipe->winsys->flush_frontbuffer
        (
-               pipe->screen,
-               surface->texture,
-               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+               pipe->winsys,
+               csc->vlGetFrameBuffer(csc),
+               pipe->priv
        );
-       pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target);
-       
-       surface->context->states.mc.textures[3] = ref_surface->texture;
-       pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures);
-       pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers);
-       
-       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
-       
+
        return 0;
 }
 
-int vlRenderBMacroBlock
+struct vlScreen* vlSurfaceGetScreen
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       enum VL_MC_TYPE mc_type,
-       struct VL_MOTION_VECTOR *motion_vector,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *past_surface,
-       struct VL_SURFACE *future_surface,
-       struct VL_SURFACE *surface
+       struct vlSurface *surface
 )
 {
-       struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vs_consts;
-       
-       assert(motion_vectors);
-       assert(blocks);
-       assert(ref_surface);
        assert(surface);
-       
-       /* TODO: Implement interlaced rendering */
-       if (picture_type != VL_FRAME_PICTURE)
-               return 0;
-       /* TODO: Implement other MC types */
-       if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
-               return 0;
-       
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-       
-       pipe = surface->context->pipe;
-       
-       vs_consts = pipe->winsys->buffer_map
-       (
-               pipe->winsys,
-               surface->context->states.mc.vs_const_buf.buffer,
-               PIPE_BUFFER_USAGE_CPU_WRITE
-       );
-       
-       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vs_consts->scale.z = 1.0f;
-       vs_consts->scale.w = 1.0f;
-       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vs_consts->mb_pos_trans.z = 0.0f;
-       vs_consts->mb_pos_trans.w = 0.0f;
-       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width;
-       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height;
-       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
-       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
-       vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width;
-       vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height;
-       vs_consts->mb_tc_trans[1].top_field.z = 0.0f;
-       vs_consts->mb_tc_trans[1].top_field.w = 0.0f;
-       
-       if (mc_type == VL_FIELD_MC)
-       {
-               vs_consts->denorm.x = (float)surface->width;
-               vs_consts->denorm.y = (float)surface->height;
-               
-               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width;
-               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height;
-               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
-               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
-               vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width;
-               vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height;
-               vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f;
-               vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f;
-               
-               pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]);
-               pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]);
-       }
-       else
-       {
-               pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]);
-               pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]);
-       }
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
-       
-       surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
-       (
-               pipe->screen,
-               surface->texture,
-               0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
-       );
-       pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target);
-       
-       surface->context->states.mc.textures[3] = past_surface->texture;
-       surface->context->states.mc.textures[4] = future_surface->texture;
-       pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures);
-       pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers);
-       
-       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
-       
-       return 0;
+
+       return surface->screen;
 }
 
-int vlPutSurface
+struct vlContext* vlBindToContext
 (
-       struct VL_SURFACE *surface,
-       Drawable drawable,
-       unsigned int srcx,
-       unsigned int srcy,
-       unsigned int srcw,
-       unsigned int srch,
-       unsigned int destx,
-       unsigned int desty,
-       unsigned int destw,
-       unsigned int desth,
-       enum VL_PICTURE picture_type
+       struct vlSurface *surface,
+       struct vlContext *context
 )
 {
-       unsigned int            create_fb = 0;
-       struct pipe_context     *pipe;
-       struct VL_CSC_VS_CONSTS *vs_consts;
-       
+       struct vlContext *old;
+
        assert(surface);
-       
-       pipe = surface->context->pipe;
-       
-       if (!surface->context->states.csc.framebuffer.cbufs[0])
-               create_fb = 1;
-       else if
-       (
-               surface->context->states.csc.framebuffer.width != destw ||
-               surface->context->states.csc.framebuffer.height != desth
-       )
-       {
-               pipe->winsys->surface_release
-               (
-                       pipe->winsys,
-                       &surface->context->states.csc.framebuffer.cbufs[0]
-               );
-               
-               create_fb = 1;
-       }
-       
-       if (create_fb)
-       {
-               surface->context->states.csc.viewport.scale[0] = destw;
-               surface->context->states.csc.viewport.scale[1] = desth;
-               surface->context->states.csc.viewport.scale[2] = 1;
-               surface->context->states.csc.viewport.scale[3] = 1;
-               surface->context->states.csc.viewport.translate[0] = 0;
-               surface->context->states.csc.viewport.translate[1] = 0;
-               surface->context->states.csc.viewport.translate[2] = 0;
-               surface->context->states.csc.viewport.translate[3] = 0;
-               
-               surface->context->states.csc.framebuffer.width = destw;
-               surface->context->states.csc.framebuffer.height = desth;
-               surface->context->states.csc.framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys);
-               pipe->winsys->surface_alloc_storage
-               (
-                       pipe->winsys,
-                       surface->context->states.csc.framebuffer.cbufs[0],
-                       destw,
-                       desth,
-                       PIPE_FORMAT_A8R8G8B8_UNORM,
-                       /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */
-                       PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE,
-                       0
-               );
-       }
-       
-       vlEndRender(surface->context);
-       
-       vs_consts = pipe->winsys->buffer_map
-       (
-               pipe->winsys,
-               surface->context->states.csc.vs_const_buf.buffer,
-               PIPE_BUFFER_USAGE_CPU_WRITE
-       );
-       
-       vs_consts->src_scale.x = srcw / (float)surface->width;
-       vs_consts->src_scale.y = srch / (float)surface->height;
-       vs_consts->src_scale.z = 1;
-       vs_consts->src_scale.w = 1;
-       vs_consts->src_trans.x = srcx / (float)surface->width;
-       vs_consts->src_trans.y = srcy / (float)surface->height;
-       vs_consts->src_trans.z = 0;
-       vs_consts->src_trans.w = 0;
-       
-       pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer);
-       
-       pipe->set_sampler_textures(pipe, 1, &surface->texture);
-       pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
-       pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-       bind_pipe_drawable(pipe, drawable);
-       /* TODO: Need to take destx, desty into consideration */
-       pipe->winsys->flush_frontbuffer
-       (
-               pipe->winsys,
-               surface->context->states.csc.framebuffer.cbufs[0],
-               pipe->priv
-       );
-       
-       vlBeginRender(surface->context);
-       
-       return 0;
-}
 
+       old = surface->context;
+       surface->context = context;
+
+       return old;
+}
index 9f56b77e1e70b7d2580c3aa1085e98becd4eb529..b975e131fa60e72a9860a6b6524edfdb626fa6b3 100644 (file)
@@ -1,81 +1,66 @@
 #ifndef vl_surface_h
 #define vl_surface_h
 
-#include <X11/Xlib.h>
 #include "vl_types.h"
 
+#ifdef VL_INTERNAL
 struct pipe_texture;
 
-struct VL_SURFACE
+struct vlSurface
 {
-       struct VL_CONTEXT       *context;
+       struct vlScreen         *screen;
+       struct vlContext        *context;
        unsigned int            width;
        unsigned int            height;
-       enum VL_FORMAT          format;
+       enum vlFormat           format;
        struct pipe_texture     *texture;
 };
+#endif
 
-int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface);
+int vlCreateSurface
+(
+       struct vlScreen *screen,
+       unsigned int width,
+       unsigned int height,
+       enum vlFormat format,
+       struct vlSurface **surface
+);
 
-int vlDestroySurface(struct VL_SURFACE *surface);
+int vlDestroySurface
+(
+       struct vlSurface *surface
+);
 
-int vlRenderIMacroBlock
+int vlRenderMacroBlocksMpeg2
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *surface
+       struct vlMpeg2MacroBlockBatch *batch,
+       struct vlSurface *surface
 );
 
-int vlRenderPMacroBlock
+int vlPutPicture
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       enum VL_MC_TYPE mc_type,
-       struct VL_MOTION_VECTOR *motion_vector,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *ref_surface,
-       struct VL_SURFACE *surface
+       struct vlSurface *surface,
+       vlNativeDrawable drawable,
+       int srcx,
+       int srcy,
+       int srcw,
+       int srch,
+       int destx,
+       int desty,
+       int destw,
+       int desth,
+       enum vlPictureType picture_type
 );
 
-int vlRenderBMacroBlock
+struct vlScreen* vlSurfaceGetScreen
 (
-       enum VL_PICTURE picture_type,
-       enum VL_FIELD_ORDER field_order,
-       unsigned int mbx,
-       unsigned int mby,
-       enum VL_MC_TYPE mc_type,
-       struct VL_MOTION_VECTOR *motion_vector,
-       unsigned int coded_block_pattern,
-       enum VL_DCT_TYPE dct_type,
-       short *blocks,
-       struct VL_SURFACE *past_surface,
-       struct VL_SURFACE *future_surface,
-       struct VL_SURFACE *surface
+       struct vlSurface *surface
 );
 
-int vlPutSurface
+struct vlContext* vlBindToContext
 (
-       struct VL_SURFACE *surface,
-       Drawable drawable,
-       unsigned int srcx,
-       unsigned int srcy,
-       unsigned int srcw,
-       unsigned int srch,
-       unsigned int destx,
-       unsigned int desty,
-       unsigned int destw,
-       unsigned int desth,
-       enum VL_PICTURE picture_type
+       struct vlSurface *surface,
+       struct vlContext *context
 );
 
 #endif
-
index 4d210c9e0aa9e1c7357e8426f8edebd1a1f24bfb..504ba8ac81a027971072ddf04a35272696127167 100644 (file)
 #ifndef vl_types_h
 #define vl_types_h
 
-enum VL_FORMAT
-{
-       VL_FORMAT_YCBCR_420,
-       VL_FORMAT_YCBCR_422,
-       VL_FORMAT_YCBCR_444
-};
+#if 1 /*#ifdef X11*/
+#include <X11/Xlib.h>
 
-enum VL_PICTURE
-{
-       VL_TOP_FIELD,
-       VL_BOTTOM_FIELD,
-       VL_FRAME_PICTURE
-};
+typedef Display* vlNativeDisplay;
+typedef Drawable vlNativeDrawable;
+#endif
+
+struct vlDisplay;
+struct vlScreen;
+struct vlContext;
+struct vlSurface;
 
-enum VL_FIELD_ORDER
+enum vlProfile
 {
-       VL_FIELD_FIRST,
-       VL_FIELD_SECOND
+       vlProfileMpeg2Simple,
+       vlProfileMpeg2Main,
+
+       vlProfileCount
 };
 
-enum VL_DCT_TYPE
+enum vlEntryPoint
 {
-       VL_DCT_FIELD_CODED,
-       VL_DCT_FRAME_CODED
+       vlEntryPointIDCT,
+       vlEntryPointMC,
+       vlEntryPointCSC,
+
+       vlEntryPointCount
 };
 
-enum VL_SAMPLE_TYPE
+enum vlFormat
 {
-       VL_FULL_SAMPLE,
-       VL_DIFFERENCE_SAMPLE
+       vlFormatYCbCr420,
+       vlFormatYCbCr422,
+       vlFormatYCbCr444
 };
 
-enum VL_MC_TYPE
+enum vlPictureType
 {
-       VL_FIELD_MC,
-       VL_FRAME_MC,
-       VL_DUAL_PRIME_MC,
-       VL_16x8_MC = VL_FRAME_MC
+       vlPictureTypeTopField,
+       vlPictureTypeBottomField,
+       vlPictureTypeFrame
 };
 
-struct VL_VERTEX4F
+enum vlMotionType
 {
-       float x, y, z, w;
+       vlMotionTypeField,
+       vlMotionTypeFrame,
+       vlMotionTypeDualPrime,
+       vlMotionType16x8
 };
 
-struct VL_VERTEX2F
+enum vlFieldOrder
 {
-       float x, y;
+       vlFieldOrderFirst,
+       vlFieldOrderSecond
 };
 
-struct VL_TEXCOORD2F
+enum vlDCTType
 {
-       float s, t;
+       vlDCTTypeFrameCoded,
+       vlDCTTypeFieldCoded
 };
 
-struct VL_MC_VS_CONSTS
+struct vlVertex2f
 {
-       struct VL_VERTEX4F      scale;
-       struct VL_VERTEX4F      mb_pos_trans;
-       struct VL_VERTEX4F      denorm;
-       struct
-       {
-               struct VL_VERTEX4F      top_field;
-               struct VL_VERTEX4F      bottom_field;
-       } mb_tc_trans[2];
+       float x, y;
 };
 
-struct VL_MC_FS_CONSTS
+struct vlVertex4f
 {
-       struct VL_VERTEX4F      multiplier;
-       struct VL_VERTEX4F      bias;
-       struct VL_VERTEX4F      y_divider;
+       float x, y, z, w;
 };
 
-struct VL_CSC_VS_CONSTS
+enum vlMacroBlockType
 {
-       struct VL_VERTEX4F      src_scale;
-       struct VL_VERTEX4F      src_trans;
+       vlMacroBlockTypeIntra,
+       vlMacroBlockTypeFwdPredicted,
+       vlMacroBlockTypeBkwdPredicted,
+       vlMacroBlockTypeBiPredicted
 };
 
-struct VL_CSC_FS_CONSTS
+struct vlMpeg2MacroBlock
 {
-       struct VL_VERTEX4F      bias;
-       float                   matrix[16];
+       unsigned int            mbx, mby;
+       enum vlMacroBlockType   mb_type;
+       enum vlMotionType       mo_type;
+       enum vlDCTType          dct_type;
+       int                     PMV[2][2][2];
+       unsigned int            cbp;
+       short                   *blocks;
 };
 
-struct VL_MOTION_VECTOR
+struct vlMpeg2MacroBlockBatch
 {
-       struct
-       {
-               int x, y;
-       } top_field, bottom_field;
+       struct vlSurface                *past_surface;
+       struct vlSurface                *future_surface;
+       enum vlPictureType              picture_type;
+       enum vlFieldOrder               field_order;
+       unsigned int                    num_macroblocks;
+       struct vlMpeg2MacroBlock        *macroblocks;
 };
 
-struct VL_CONTEXT;
-struct VL_SURFACE;
-
 #endif
-
index 2421ae22101ac224eff8c38f514574030aba4b26..50aa9af66f2684d7ae44b86e92f1e3ea089bc8f2 100644 (file)
@@ -4,14 +4,13 @@
 unsigned int vlRoundUpPOT(unsigned int x)
 {
        unsigned int i;
-       
+
        assert(x > 0);
-       
+
        --x;
-       
+
        for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1)
                x |= x >> i;
-       
+
        return x + 1;
 }
-
index e4b72c4f87003b1fb26c1f66c60f2e02c3c72400..bc98e79df479da0f4ec73aab3d216772a6edc869 100644 (file)
@@ -4,4 +4,3 @@
 unsigned int vlRoundUpPOT(unsigned int x);
 
 #endif
-
index deca305bdcf92543abaf32276fde871b65ebb296..328b035576b4575bf14493f78a1e7da09bd3a2ec 100644 (file)
@@ -2,83 +2,78 @@
 #include <stdlib.h>
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMC.h>
+#include <vl_display.h>
+#include <vl_screen.h>
 #include <vl_context.h>
 
-/*
- * XvMC defines 64 element blocks (8x8 elements).
- * Elements are 8 bits when they represent color values,
- * 9 bits when they reprecent DCT coefficients, we
- * store them in 2 bytes in either case.
- */
 #define BLOCK_SIZE (64 * 2)
 
 Status XvMCCreateBlocks(Display *display, XvMCContext *context, unsigned int num_blocks, XvMCBlockArray *blocks)
 {
-       struct vl_context *vl_ctx;
-       
+       struct vlContext *vl_ctx;
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
        if (num_blocks == 0)
                return BadValue;
-       
+
        assert(blocks);
-       
+
        vl_ctx = context->privData;
-       assert(display == vl_ctx->display);
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx))));
 
        blocks->context_id = context->context_id;
        blocks->num_blocks = num_blocks;
        blocks->blocks = malloc(BLOCK_SIZE * num_blocks);
        /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */
        blocks->privData = display;
-       
+
        return Success;
 }
 
 Status XvMCDestroyBlocks(Display *display, XvMCBlockArray *blocks)
-{      
+{
        assert(display);
        assert(blocks);
        assert(display == blocks->privData);
        free(blocks->blocks);
-       
+
        return Success;
 }
 
 Status XvMCCreateMacroBlocks(Display *display, XvMCContext *context, unsigned int num_blocks, XvMCMacroBlockArray *blocks)
 {
-       struct vl_context *vl_ctx;
-       
+       struct vlContext *vl_ctx;
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
        if (num_blocks == 0)
                return BadValue;
-       
+
        assert(blocks);
-       
+
        vl_ctx = context->privData;
-       assert(display == vl_ctx->display);
-       
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx))));
+
        blocks->context_id = context->context_id;
        blocks->num_blocks = num_blocks;
        blocks->macro_blocks = malloc(sizeof(XvMCMacroBlock) * num_blocks);
        /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */
        blocks->privData = display;
-       
+
        return Success;
 }
 
 Status XvMCDestroyMacroBlocks(Display *display, XvMCMacroBlockArray *blocks)
-{      
+{
        assert(display);
        assert(blocks);
        assert(display == blocks->privData);
        free(blocks->macro_blocks);
-       
+
        return Success;
 }
-
index 9cf654d6bb211069765833cf889c32fa7a9898e6..760e012d1a807167382cf2e1d4a48604a3a4e51f 100644 (file)
@@ -1,10 +1,23 @@
 #include <assert.h>
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMClib.h>
+#include <pipe/p_context.h>
+#include <vl_display.h>
+#include <vl_screen.h>
 #include <vl_context.h>
 #include <vl_winsys.h>
 
-static Status Validate(Display *display, XvPortID port, int surface_type_id, unsigned int width, unsigned int height, int flags, int *chroma_format)
+static Status Validate
+(
+       Display *display,
+       XvPortID port,
+       int surface_type_id,
+       unsigned int width,
+       unsigned int height,
+       int flags,
+       int *chroma_format,
+       int *mc_type
+)
 {
        unsigned int    found_port = 0;
        unsigned int    found_surface = 0;
@@ -14,13 +27,13 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns
        unsigned int    max_width, max_height;
        Status          ret;
        unsigned int    i, j, k;
-       
+
        assert(display && chroma_format);
-       
+
        ret = XvQueryAdaptors(display, XDefaultRootWindow(display), &num_adaptors, &adaptor_info);
        if (ret != Success)
                return ret;
-       
+
        /* Scan through all adaptors looking for this port and surface */
        for (i = 0; i < num_adaptors && !found_port; ++i)
        {
@@ -31,10 +44,10 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns
                        if (adaptor_info[i].base_id + j == port)
                        {
                                XvMCSurfaceInfo *surface_info;
-                               
+
                                found_port = 1;
                                surface_info = XvMCListSurfaceTypes(display, adaptor_info[i].base_id, &num_types);
-                               
+
                                if (surface_info)
                                {
                                        for (k = 0; k < num_types && !found_surface; ++k)
@@ -45,9 +58,10 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns
                                                        max_width = surface_info[k].max_width;
                                                        max_height = surface_info[k].max_height;
                                                        *chroma_format = surface_info[k].chroma_format;
+                                                       *mc_type = surface_info[k].mc_type;
                                                }
                                        }
-                                       
+
                                        XFree(surface_info);
                                }
                                else
@@ -58,9 +72,9 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns
                        }
                }
        }
-       
+
        XvFreeAdaptorInfo(adaptor_info);
-       
+
        if (!found_port)
                return XvBadPort;
        if (!found_surface)
@@ -69,60 +83,86 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns
                return BadValue;
        if (flags != XVMC_DIRECT && flags != 0)
                return BadValue;
-       
+
        return Success;
 }
 
-static enum VL_FORMAT FormatToVL(int xvmc_format)
+static enum vlProfile ProfileToVL(int xvmc_profile)
+{
+       if (xvmc_profile & XVMC_MPEG_1)
+               assert(0);
+       else if (xvmc_profile & XVMC_MPEG_2)
+               return vlProfileMpeg2Main;
+       else if (xvmc_profile & XVMC_H263)
+               assert(0);
+       else if (xvmc_profile & XVMC_MPEG_4)
+               assert(0);
+       else
+               assert(0);
+
+       return -1;
+}
+
+static enum vlEntryPoint EntryToVL(int xvmc_entry)
+{
+       return xvmc_entry & XVMC_IDCT ? vlEntryPointIDCT : vlEntryPointMC;
+}
+
+static enum vlFormat FormatToVL(int xvmc_format)
 {
-       enum VL_FORMAT vl_format;
-       
        switch (xvmc_format)
        {
                case XVMC_CHROMA_FORMAT_420:
-               {
-                       vl_format = VL_FORMAT_YCBCR_420;
-                       break;
-               }
+                       return vlFormatYCbCr420;
                case XVMC_CHROMA_FORMAT_422:
-               {
-                       vl_format = VL_FORMAT_YCBCR_422;
-                       break;
-               }
+                       return vlFormatYCbCr422;
                case XVMC_CHROMA_FORMAT_444:
-               {
-                       vl_format = VL_FORMAT_YCBCR_444;
-                       break;
-               }
+                       return vlFormatYCbCr444;
                default:
                        assert(0);
        }
-       
-       return vl_format;
+
+       return -1;
 }
 
 Status XvMCCreateContext(Display *display, XvPortID port, int surface_type_id, int width, int height, int flags, XvMCContext *context)
 {
        int                     chroma_format;
+       int                     mc_type;
        Status                  ret;
-       struct VL_CONTEXT       *vl_ctx;
+       struct vlDisplay        *vl_dpy;
+       struct vlScreen         *vl_scrn;
+       struct vlContext        *vl_ctx;
        struct pipe_context     *pipe;
-       
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
-       
-       ret = Validate(display, port, surface_type_id, width, height, flags, &chroma_format);
+
+       ret = Validate(display, port, surface_type_id, width, height, flags, &chroma_format, &mc_type);
        if (ret != Success)
                return ret;
-       
+
+       /* XXX: Assumes default screen, should check which screen port is on */
        pipe = create_pipe_context(display, XDefaultScreen(display));
-       
+
        assert(pipe);
-       
-       vlCreateContext(display, pipe, width, height, FormatToVL(chroma_format), &vl_ctx);
-       
+
+       vlCreateDisplay(display, &vl_dpy);
+       vlCreateScreen(vl_dpy, XDefaultScreen(display), pipe->screen, &vl_scrn);
+       vlCreateContext
+       (
+               vl_scrn,
+               pipe,
+               width,
+               height,
+               FormatToVL(chroma_format),
+               ProfileToVL(mc_type),
+               EntryToVL(mc_type),
+               &vl_ctx
+       );
+
        context->context_id = XAllocID(display);
        context->surface_type_id = surface_type_id;
        context->width = width;
@@ -130,89 +170,27 @@ Status XvMCCreateContext(Display *display, XvPortID port, int surface_type_id, i
        context->flags = flags;
        context->port = port;
        context->privData = vl_ctx;
-       
+
        return Success;
 }
 
 Status XvMCDestroyContext(Display *display, XvMCContext *context)
 {
-       struct VL_CONTEXT       *vl_ctx;
+       struct vlContext        *vl_ctx;
        struct pipe_context     *pipe;
-       
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
-       
+
        vl_ctx = context->privData;
-       
-       assert(display == vl_ctx->display);
-       
-       pipe = vl_ctx->pipe;
-       vlDestroyContext(vl_ctx);
-       destroy_pipe_context(pipe);
-       
-       return Success;
-}
 
-/* XXX: The following are here temporarily, need to be implemented in the DDX driver */
-/* TODO: Figure out which of these need to be in DDX, which are better off in DDX, which can stay */
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx))));
 
-Bool XvMCQueryExtension(Display *display, int *event_base, int *err_base)
-{
-       *event_base = 0;
-       *err_base = 0;
-       
-       return True;
-}
+       pipe = vlGetPipeContext(vl_ctx);
+       vlDestroyContext(vl_ctx);
+       destroy_pipe_context(pipe);
 
-Status XvMCQueryVersion(Display *display, int *major, int *minor)
-{
-       *major = 1;
-       *minor = 0;
-       
        return Success;
 }
-
-XvMCSurfaceInfo* XvMCListSurfaceTypes(Display *display, XvPortID port, int *num)
-{
-       XvMCSurfaceInfo *surface_info = calloc(1, sizeof(XvMCSurfaceInfo));
-       
-       *num = 1;
-       
-       surface_info->chroma_format = XVMC_CHROMA_FORMAT_420;
-       surface_info->max_width = 2048;
-       surface_info->max_height = 2048;
-       surface_info->subpicture_max_width = 2048;
-       surface_info->subpicture_max_height = 2048;
-       surface_info->mc_type = XVMC_IDCT | XVMC_MPEG_2;
-       surface_info->surface_type_id = 123; /* FIXME: XAllocID(display)*/;
-       surface_info->flags = XVMC_INTRA_UNSIGNED | XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE;
-       
-       return surface_info;
-}
-
-XvImageFormatValues* XvMCListSubpictureTypes(Display* display, XvPortID port, int surface_type_id, int *count_return)
-{
-       XvImageFormatValues *image_formats = calloc(1, sizeof(XvImageFormatValues));
-       
-       *count_return = 1;
-       
-       image_formats[0].id = 123;
-       image_formats[0].type = XvRGB;
-       image_formats[0].byte_order = LSBFirst;
-       image_formats[0].bits_per_pixel = 8;
-       image_formats[0].format = XvPacked;
-       image_formats[0].num_planes = 1;
-       image_formats[0].depth = 8;
-       image_formats[0].red_mask = 0x0000FF;
-       image_formats[0].green_mask = 0x00FF00;
-       image_formats[0].blue_mask = 0xFF0000;
-       image_formats[0].component_order[0] = 'R';
-       image_formats[0].component_order[0] = 'G';
-       image_formats[0].component_order[0] = 'B';
-       image_formats[0].scanline_order = XvTopToBottom;
-       
-       return image_formats;
-}
-
index 1c07220e84437539fba3c4ace65f77f3b0862e80..038befc297b42b3f87f94140deb2c382f3a376dd 100644 (file)
@@ -1,90 +1,96 @@
 #include <assert.h>
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMC.h>
+#include <vl_display.h>
+#include <vl_screen.h>
 #include <vl_context.h>
 #include <vl_surface.h>
+#include <vl_types.h>
 
-static enum VL_PICTURE PictureToVL(int xvmc_pic)
+static enum vlMacroBlockType TypeToVL(int xvmc_mb_type)
+{
+       if (xvmc_mb_type & XVMC_MB_TYPE_INTRA)
+               return vlMacroBlockTypeIntra;
+       if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_FORWARD)
+               return vlMacroBlockTypeFwdPredicted;
+       if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_BACKWARD)
+               return vlMacroBlockTypeBkwdPredicted;
+       if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
+               return vlMacroBlockTypeBiPredicted;
+
+       assert(0);
+
+       return -1;
+}
+
+static enum vlPictureType PictureToVL(int xvmc_pic)
 {
-       enum VL_PICTURE vl_pic;
-       
        switch (xvmc_pic)
        {
                case XVMC_TOP_FIELD:
-               {
-                       vl_pic = VL_TOP_FIELD;
-                       break;
-               }
+                       return vlPictureTypeTopField;
                case XVMC_BOTTOM_FIELD:
-               {
-                       vl_pic = VL_BOTTOM_FIELD;
-                       break;
-               }
+                       return vlPictureTypeBottomField;
                case XVMC_FRAME_PICTURE:
-               {
-                       vl_pic = VL_FRAME_PICTURE;
-                       break;
-               }
+                       return vlPictureTypeFrame;
                default:
                        assert(0);
        }
-       
-       return vl_pic;
+
+       return -1;
 }
 
-static enum VL_MC_TYPE MotionToVL(int xvmc_motion_type)
+static enum vlMotionType MotionToVL(int xvmc_motion_type)
 {
-       enum VL_MC_TYPE vl_mc_type;
-       
        switch (xvmc_motion_type)
        {
                case XVMC_PREDICTION_FRAME:
-               {
-                       vl_mc_type = VL_FRAME_MC;
-                       break;
-               }
+                       return vlMotionTypeFrame;
                case XVMC_PREDICTION_FIELD:
-               {
-                       vl_mc_type = VL_FIELD_MC;
-                       break;
-               }
+                       return vlMotionTypeField;
                case XVMC_PREDICTION_DUAL_PRIME:
-               {
-                       vl_mc_type = VL_DUAL_PRIME_MC;
-                       break;
-               }
+                       return vlMotionTypeDualPrime;
                default:
                        assert(0);
        }
-       
-       return vl_mc_type;
+
+       return -1;
 }
 
 Status XvMCCreateSurface(Display *display, XvMCContext *context, XvMCSurface *surface)
 {
-       struct VL_CONTEXT *vl_ctx;
-       struct VL_SURFACE *vl_sfc;
-       
+       struct vlContext *vl_ctx;
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
        if (!surface)
                return XvMCBadSurface;
-       
+
        vl_ctx = context->privData;
-       
-       assert(display == vl_ctx->display);
-       
-       vlCreateSurface(vl_ctx, &vl_sfc);
-       
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx))));
+
+       vlCreateSurface
+       (
+               vlContextGetScreen(vl_ctx),
+               context->width,
+               context->height,
+               vlGetPictureFormat(vl_ctx),
+               &vl_sfc
+       );
+
+       vlBindToContext(vl_sfc, vl_ctx);
+
        surface->surface_id = XAllocID(display);
        surface->context_id = context->context_id;
        surface->surface_type_id = context->surface_type_id;
        surface->width = context->width;
        surface->height = context->height;
        surface->privData = vl_sfc;
-       
+
        return Success;
 }
 
@@ -103,19 +109,21 @@ Status XvMCRenderSurface
        XvMCBlockArray *blocks
 )
 {
-       struct VL_CONTEXT       *vl_ctx;
-       struct VL_SURFACE       *target_vl_surface;
-       struct VL_SURFACE       *past_vl_surface;
-       struct VL_SURFACE       *future_vl_surface;
-       unsigned int            i;
-       
+       struct vlContext                *vl_ctx;
+       struct vlSurface                *target_vl_surface;
+       struct vlSurface                *past_vl_surface;
+       struct vlSurface                *future_vl_surface;
+       struct vlMpeg2MacroBlockBatch   batch;
+       struct vlMpeg2MacroBlock        vl_macroblocks[num_macroblocks];
+       unsigned int                    i;
+
        assert(display);
-       
+
        if (!context)
                return XvMCBadContext;
        if (!target_surface)
                return XvMCBadSurface;
-       
+
        if
        (
                picture_structure != XVMC_TOP_FIELD &&
@@ -125,178 +133,94 @@ Status XvMCRenderSurface
                return BadValue;
        if (future_surface && !past_surface)
                return BadMatch;
-       
+
        vl_ctx = context->privData;
-       
-       assert(display == vl_ctx->display);
-       
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx))));
+
        target_vl_surface = target_surface->privData;
        past_vl_surface = past_surface ? past_surface->privData : NULL;
        future_vl_surface = future_surface ? future_surface->privData : NULL;
-       
-       assert(vl_ctx == target_vl_surface->context);
-       assert(!past_vl_surface || vl_ctx == past_vl_surface->context);
-       assert(!future_vl_surface || vl_ctx == future_vl_surface->context);
-       
+
+       assert(context->context_id == target_surface->context_id);
+       assert(!past_surface || context->context_id == past_surface->context_id);
+       assert(!future_surface || context->context_id == future_surface->context_id);
+
        assert(macroblocks);
        assert(blocks);
-       
+
        assert(macroblocks->context_id == context->context_id);
        assert(blocks->context_id == context->context_id);
-       
+
        assert(flags == 0 || flags == XVMC_SECOND_FIELD);
-       
-       /* TODO: Batch macroblocks by type (I,P,B) */
-       
-       for (i = first_macroblock; i < first_macroblock + num_macroblocks; ++i)
-               if (macroblocks->macro_blocks[i].macroblock_type & XVMC_MB_TYPE_INTRA)
-                       vlRenderIMacroBlock
-                       (
-                               PictureToVL(picture_structure),
-                               flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
-                               macroblocks->macro_blocks[i].x,
-                               macroblocks->macro_blocks[i].y,
-                               macroblocks->macro_blocks[i].coded_block_pattern,
-                               macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
-                               blocks->blocks + (macroblocks->macro_blocks[i].index * 64),
-                               target_vl_surface
-                       );
-               else if
-               (
-                       (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
-                       == XVMC_MB_TYPE_MOTION_FORWARD
-               )
-               {
-                       struct VL_MOTION_VECTOR motion_vector =
-                       {
-                               {
-                                       macroblocks->macro_blocks[i].PMV[0][0][0],
-                                       macroblocks->macro_blocks[i].PMV[0][0][1],
-                               },
-                               {
-                                       macroblocks->macro_blocks[i].PMV[1][0][0],
-                                       macroblocks->macro_blocks[i].PMV[1][0][1],
-                               }
-                       };
-                                               
-                       vlRenderPMacroBlock
-                       (
-                               PictureToVL(picture_structure),
-                               flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
-                               macroblocks->macro_blocks[i].x,
-                               macroblocks->macro_blocks[i].y,
-                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
-                               &motion_vector,
-                               macroblocks->macro_blocks[i].coded_block_pattern,
-                               macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
-                               blocks->blocks + (macroblocks->macro_blocks[i].index * 64),
-                               past_vl_surface,
-                               target_vl_surface
-                       );
-               }
-               else if
-               (
-                       (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
-                       == XVMC_MB_TYPE_MOTION_BACKWARD
-               )
-               {
-                       struct VL_MOTION_VECTOR motion_vector =
-                       {
-                               {
-                                       macroblocks->macro_blocks[i].PMV[0][1][0],
-                                       macroblocks->macro_blocks[i].PMV[0][1][1],
-                               },
-                               {
-                                       macroblocks->macro_blocks[i].PMV[1][1][0],
-                                       macroblocks->macro_blocks[i].PMV[1][1][1],
-                               }
-                       };
-                       
-                       vlRenderPMacroBlock
-                       (
-                               PictureToVL(picture_structure),
-                               flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
-                               macroblocks->macro_blocks[i].x,
-                               macroblocks->macro_blocks[i].y,
-                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
-                               &motion_vector,
-                               macroblocks->macro_blocks[i].coded_block_pattern,
-                               macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
-                               blocks->blocks + (macroblocks->macro_blocks[i].index * 64),
-                               future_vl_surface,
-                               target_vl_surface
-                       );
-               }
-               else if
-               (
-                       (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
-                       == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)
-               )
-               {
-                       struct VL_MOTION_VECTOR motion_vector[2] =
-                       {
-                               {
-                                       {
-                                               macroblocks->macro_blocks[i].PMV[0][0][0],
-                                               macroblocks->macro_blocks[i].PMV[0][0][1],
-                                       },
-                                       {
-                                               macroblocks->macro_blocks[i].PMV[1][0][0],
-                                               macroblocks->macro_blocks[i].PMV[1][0][1],
-                                       }
-                               },
-                               {
-                                       {
-                                               macroblocks->macro_blocks[i].PMV[0][1][0],
-                                               macroblocks->macro_blocks[i].PMV[0][1][1],
-                                       },
-                                       {
-                                               macroblocks->macro_blocks[i].PMV[1][1][0],
-                                               macroblocks->macro_blocks[i].PMV[1][1][1],
-                                       }
-                               }
-                       };
-                       
-                       vlRenderBMacroBlock
-                       (
-                               PictureToVL(picture_structure),
-                               flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
-                               macroblocks->macro_blocks[i].x,
-                               macroblocks->macro_blocks[i].y,
-                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
-                               motion_vector,
-                               macroblocks->macro_blocks[i].coded_block_pattern,
-                               macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
-                               blocks->blocks + (macroblocks->macro_blocks[i].index * 64),
-                               past_vl_surface,
-                               future_vl_surface,
-                               target_vl_surface
-                       );
-               }
-               else
-                       fprintf(stderr, "Unrecognized macroblock\n");
-       
+
+       batch.past_surface = past_vl_surface;
+       batch.future_surface = future_vl_surface;
+       batch.picture_type = PictureToVL(picture_structure);
+       batch.field_order = flags & XVMC_SECOND_FIELD ? vlFieldOrderSecond : vlFieldOrderFirst;
+       batch.num_macroblocks = num_macroblocks;
+       batch.macroblocks = vl_macroblocks;
+
+       for (i = 0; i < num_macroblocks; ++i)
+       {
+               unsigned int j = first_macroblock + i;
+
+               unsigned int k, l, m;
+
+               batch.macroblocks[i].mbx = macroblocks->macro_blocks[j].x;
+               batch.macroblocks[i].mby = macroblocks->macro_blocks[j].y;
+               batch.macroblocks[i].mb_type = TypeToVL(macroblocks->macro_blocks[j].macroblock_type);
+               if (batch.macroblocks[i].mb_type != vlMacroBlockTypeIntra)
+                       batch.macroblocks[i].mo_type = MotionToVL(macroblocks->macro_blocks[j].motion_type);
+               batch.macroblocks[i].dct_type = macroblocks->macro_blocks[j].dct_type & XVMC_DCT_TYPE_FIELD ? vlDCTTypeFieldCoded : vlDCTTypeFrameCoded;
+
+               for (k = 0; k < 2; ++k)
+                       for (l = 0; l < 2; ++l)
+                               for (m = 0; m < 2; ++m)
+                                       batch.macroblocks[i].PMV[k][l][m] = macroblocks->macro_blocks[j].PMV[k][l][m];
+
+               batch.macroblocks[i].cbp = macroblocks->macro_blocks[j].coded_block_pattern;
+               batch.macroblocks[i].blocks = blocks->blocks + (macroblocks->macro_blocks[j].index * 64);
+       }
+
+       vlRenderMacroBlocksMpeg2(&batch, target_vl_surface);
+
        return Success;
 }
 
 Status XvMCFlushSurface(Display *display, XvMCSurface *surface)
 {
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-       
-       /* TODO: Check display & surface match */
+
+       vl_sfc = surface->privData;
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc))));
+
+       /* TODO */
+
        return Success;
 }
 
 Status XvMCSyncSurface(Display *display, XvMCSurface *surface)
 {
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-       
+
+       vl_sfc = surface->privData;
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc))));
+
+       /* TODO */
+
        return Success;
 }
 
@@ -321,92 +245,85 @@ Status XvMCPutSurface
        unsigned int            width, height;
        unsigned int            border_width;
        unsigned int            depth;
-       struct VL_SURFACE       *vl_sfc;
-       
+       struct vlSurface        *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-               
+
        if (XGetGeometry(display, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
                return BadDrawable;
-       
+
        assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
-       
+
        /* TODO: Correct for negative srcx,srcy & destx,desty by clipping */
-       
+
        assert(srcx + srcw - 1 < surface->width);
        assert(srcy + srch - 1 < surface->height);
        assert(destx + destw - 1 < width);
        assert(desty + desth - 1 < height);
-       
+
        vl_sfc = surface->privData;
-       
-       vlPutSurface(vl_sfc, drawable, srcx, srcy, srcw, srch, destx, desty, destw, desth, PictureToVL(flags));
-       
+
+       vlPutPicture(vl_sfc, drawable, srcx, srcy, srcw, srch, destx, desty, destw, desth, PictureToVL(flags));
+
        return Success;
 }
 
 Status XvMCGetSurfaceStatus(Display *display, XvMCSurface *surface, int *status)
 {
-       struct VL_CONTEXT *vl_ctx;
-       struct VL_SURFACE *vl_sfc;
-       
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-               
+
        assert(status);
-       
+
        vl_sfc = surface->privData;
-       vl_ctx = vl_sfc->context;
-       
-       assert(display == vl_ctx->display);
-       
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc))));
+
        /* TODO */
        *status = 0;
-       
+
        return Success;
 }
 
 Status XvMCDestroySurface(Display *display, XvMCSurface *surface)
 {
-       struct VL_CONTEXT *vl_ctx;
-       struct VL_SURFACE *vl_sfc;
-       
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-       
+
        vl_sfc = surface->privData;
-       vl_ctx = vl_sfc->context;
-       
-       assert(display == vl_ctx->display);
-       
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc))));
+
        vlDestroySurface(vl_sfc);
-       
+
        return Success;
 }
 
 Status XvMCHideSurface(Display *display, XvMCSurface *surface)
 {
-       struct VL_CONTEXT *vl_ctx;
-       struct VL_SURFACE *vl_sfc;
-       
+       struct vlSurface *vl_sfc;
+
        assert(display);
-       
+
        if (!surface)
                return XvMCBadSurface;
-       
+
        vl_sfc = surface->privData;
-       vl_ctx = vl_sfc->context;
-       
-       assert(display == vl_ctx->display);
-       
+
+       assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc))));
+
        /* No op, only for overlaid rendering */
-       
+
        return Success;
 }
-