g3dvl: Support for field and frame based MC for progressive pictures.
authorYounes Manton <younes.m@gmail.com>
Sun, 29 Jun 2008 00:16:01 +0000 (20:16 -0400)
committerYounes Manton <younes.m@gmail.com>
Mon, 30 Jun 2008 14:11:02 +0000 (10:11 -0400)
MC support for frame and field based motion prediction. Also various bug
fixes, clean up.

src/gallium/state_trackers/g3dvl/tests/.gitignore
src/gallium/state_trackers/g3dvl/tests/Makefile
src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c [new file with mode: 0644]
src/gallium/state_trackers/g3dvl/vl_context.c
src/gallium/state_trackers/g3dvl/vl_context.h
src/gallium/state_trackers/g3dvl/vl_data.c
src/gallium/state_trackers/g3dvl/vl_surface.c
src/gallium/state_trackers/g3dvl/vl_types.h
src/libXvMC/surface.c

index 939666da9ab3d88fe446d419e5f160f70af57050..9b1ec4e2122415eb5be600e6d349a2add847a290 100644 (file)
@@ -2,5 +2,5 @@ test_context
 test_surface
 test_i_rendering
 test_p_rendering
+test_pf_rendering
 test_b_rendering
-
index 8f983593c3a3a30838124d2a11fa7e29df686bb9..45cefa2e570403ac82e762d7d0a8c5bf297e276d 100644 (file)
@@ -20,7 +20,7 @@ LIBS  += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil
 
 .PHONY = all clean
 
-all: test_context test_surface test_i_rendering test_p_rendering test_b_rendering
+all: test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering
 
 test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o
        $(CC) ${LDFLAGS} -o $@ $^ ${LIBS}
@@ -34,9 +34,12 @@ test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o
 test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o
        $(CC) ${LDFLAGS} -o $@ $^ ${LIBS}
 
+test_pf_rendering: test_pf_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o
+       $(CC) ${LDFLAGS} -o $@ $^ ${LIBS}
+
 test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o
        $(CC) ${LDFLAGS} -o $@ $^ ${LIBS}
 
 clean:
-       rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_b_rendering
+       rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering
 
diff --git a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c
new file mode 100644 (file)
index 0000000..43586fc
--- /dev/null
@@ -0,0 +1,214 @@
+#include <stdio.h>
+#include <X11/Xlib.h>
+#include <vl_context.h>
+#include <vl_surface.h>
+#include <xsp_winsys.h>
+
+static const unsigned short ycbcr16x16_420[8*8*6] =
+{
+       0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072,
+       0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072,
+       0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,
+       0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,
+       0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5,
+       
+       0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,
+       0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F,
+       
+       0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,
+       0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E
+};
+
+static const signed short ycbcr16x16_420_2[8*8*6] =
+{
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+       
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0,
+         0,  0,  0,  0,  0,  0,  0,  0
+};
+
+int main(int argc, char **argv)
+{
+       const unsigned int      video_width = 32, video_height = 32;
+       const unsigned int      window_width = video_width * 2, window_height = video_height * 2;
+       int                     quit = 0;
+       Display                 *display;
+       Window                  root, window;
+       Pixmap                  framebuffer;
+       XEvent                  event;
+       struct pipe_context     *pipe;
+       struct VL_CONTEXT       *ctx;
+       struct VL_SURFACE       *sfc, *ref_sfc;
+       struct VL_MOTION_VECTOR motion_vector =
+       {
+               {0, 0}, {32, 32}
+       };
+       
+       display = XOpenDisplay(NULL);
+       root = XDefaultRootWindow(display);
+       window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0);
+       framebuffer = XCreatePixmap(display, root, window_width, window_height, 24);
+       
+       XSelectInput(display, window, ExposureMask | KeyPressMask);
+       XMapWindow(display, window);
+       XSync(display, 0);
+       
+       pipe = create_pipe_context(display);
+       vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx);
+       vlCreateSurface(ctx, &sfc);
+       vlCreateSurface(ctx, &ref_sfc);
+       
+       vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc);
+       vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc);
+       vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc);
+       vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc);
+       vlRenderPMacroBlock
+       (
+               VL_FRAME_PICTURE,
+               VL_FIELD_FIRST,
+               0,
+               0,
+               VL_FIELD_MC,
+               &motion_vector,
+               0x3F,
+               VL_DCT_FRAME_CODED,
+               (short*)ycbcr16x16_420_2,
+               ref_sfc,
+               sfc
+       );
+       vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE);
+       
+       puts("Press any key to continue...");
+       
+       while (!quit)
+       {
+               XNextEvent(display, &event);
+               switch (event.type)
+               {
+                       case Expose:
+                       {
+                               XCopyArea
+                               (
+                                       display,
+                                       framebuffer,
+                                       window,
+                                       XDefaultGC(display, XDefaultScreen(display)),
+                                       0,
+                                       0,
+                                       window_width,
+                                       window_height,
+                                       0,
+                                       0
+                               );
+                               break;
+                       }
+                       case KeyPress:
+                       {
+                               quit = 1;
+                               break;
+                       }
+               }
+       }
+       
+       vlDestroySurface(sfc);
+       vlDestroySurface(ref_sfc);
+       vlDestroyContext(ctx);
+       
+       XFreePixmap(display, framebuffer);
+       XDestroyWindow(display, window);
+       XCloseDisplay(display);
+       
+       return 0;
+}
+
index 3b9afabbb845a9b3da994d4dcc1d2981fd6e55f8..d2b1ad7948e90e28b76d4aa4067ca420d71e6ca1 100644 (file)
@@ -36,8 +36,8 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context)
        const unsigned int              semantic_names[3] =
                                        {
                                                TGSI_SEMANTIC_POSITION,
-                                               TGSI_SEMANTIC_GENERIC,
-                                               TGSI_SEMANTIC_GENERIC,
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Chroma texcoords */
                                        };
        const unsigned int              semantic_indexes[3] = {0, 1, 2};
        const unsigned int              proc_type = TGSI_PROCESSOR_VERTEX;
@@ -353,7 +353,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
        return 0;
 }
 
-static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
+static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context)
 {
        const unsigned int              max_tokens = 100;
        const unsigned int              num_input_attribs = 3;
@@ -361,15 +361,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
        const unsigned int              input_semantic_names[3] =
                                        {
                                                TGSI_SEMANTIC_POSITION,
-                                               TGSI_SEMANTIC_GENERIC,
-                                               TGSI_SEMANTIC_GENERIC
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC   /* Chroma texcoords */
                                        };
        const unsigned int              output_semantic_names[4] =
                                        {
                                                TGSI_SEMANTIC_POSITION,
-                                               TGSI_SEMANTIC_GENERIC,
-                                               TGSI_SEMANTIC_GENERIC,
-                                               TGSI_SEMANTIC_GENERIC
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Chroma texcoords */
+                                               TGSI_SEMANTIC_GENERIC   /* Ref surface texcoords */
                                        };
        const unsigned int              input_semantic_indexes[3] = {0, 1, 2};
        const unsigned int              output_semantic_indexes[4] = {0, 1, 2, 3};
@@ -430,14 +430,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
        /* Declare constant inputs */
        /* C[0] scales the normalized MB to cover 16x16 pixels,
           C[1] translates the macroblock into position on the surface
-          C[2] translates the ref surface texcoords to the ref macroblock */
+          C[2] unused
+          C[3] translates the ref surface texcoords to the ref macroblock */
        decl = tgsi_default_full_declaration();
        decl.Declaration.File = TGSI_FILE_CONSTANT;
        decl.Declaration.Semantic = 1;
        decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
        decl.Semantic.SemanticIndex = 0;
        decl.u.DeclarationRange.First = 0;
-       decl.u.DeclarationRange.Last = 2;
+       decl.u.DeclarationRange.Last = 3;
        ti += tgsi_build_full_declaration
        (
                &decl,
@@ -526,7 +527,7 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
                );
        }
 
-       /* add o3, t0, c2       ; Translate texcoords into position */
+       /* add o3, t0, c3       ; Translate texcoords into position */
        inst = tgsi_default_full_instruction();
        inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        inst.Instruction.NumDstRegs = 1;
@@ -536,6 +537,264 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
        inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullSrcRegisters[0].SrcRegister.Index = 0;
        inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 3;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* END */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_END;
+       inst.Instruction.NumDstRegs = 0;
+       inst.Instruction.NumSrcRegs = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       vs.tokens = tokens;
+       
+       context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs);
+       
+       free(tokens);
+       
+       return 0;
+}
+
+static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context)
+{
+       const unsigned int              max_tokens = 100;
+       const unsigned int              num_input_attribs = 3;
+       const unsigned int              num_output_attribs = 6;
+       const unsigned int              input_semantic_names[3] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC   /* Chroma texcoords */
+                                       };
+       const unsigned int              output_semantic_names[6] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Chroma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Top field surface texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Bottom field surface texcoords */
+                                               TGSI_SEMANTIC_POSITION  /* Pos */
+                                       };
+       const unsigned int              input_semantic_indexes[3] = {0, 1, 2};
+       const unsigned int              output_semantic_indexes[6] = {0, 1, 2, 3, 4, 5};
+       const unsigned int              proc_type = TGSI_PROCESSOR_VERTEX;
+       
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+       struct tgsi_processor           *processor;
+       
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+       
+       unsigned int                    ti;
+       unsigned int                    i;
+       
+       assert(context);
+       
+       pipe = context->pipe;
+       
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+
+       /* Processor */
+       processor = (struct tgsi_processor*)&tokens[2];
+       *processor = tgsi_build_processor(proc_type, header);
+
+       ti = 3;
+
+       /* Declare inputs (pos, texcoords) */
+       for (i = 0; i < num_input_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = input_semantic_names[i];
+               decl.Semantic.SemanticIndex = input_semantic_indexes[i];
+
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* Declare constant inputs */
+       /* C[0] scales the normalized MB to cover 16x16 pixels,
+          C[1] translates the macroblock into position on the surface
+          C[2] denormalizes pos components
+          C[3] translates the ref surface top field texcoords to the ref macroblock
+          C[4] translates the ref surface bottom field texcoords to the ref macroblock */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_CONSTANT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 4;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* Declare outputs (pos, texcoords) */
+       for (i = 0; i < num_output_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_OUTPUT;
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = output_semantic_names[i];
+               decl.Semantic.SemanticIndex = output_semantic_indexes[i];
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul t0, i0, c0       ; Scale normalized coords to window coords */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* add t1, t0, c1       ; Translate vertex into position */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mov o0, t1           ; Move vertex pos to output */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 1;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /*
+       mov o1, i1              ; Move luma & chroma texcoords to output
+       mov o2, i2
+       */
+       for (i = 1; i < num_output_attribs - 1; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i;
+               inst.Instruction.NumSrcRegs = 1;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+
+       /* add o3, t0, c3       ; Translate top field texcoords into position
+          add o4, t0, c4       ; Translate bottom field texcoords into position */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 3;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i + 3;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul o5, t1, c2       ; Denorm pos for fragment shader */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 5;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
        inst.FullSrcRegisters[1].SrcRegister.Index = 2;
        ti += tgsi_build_full_instruction
        (
@@ -560,14 +819,14 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context)
        
        vs.tokens = tokens;
        
-       context->states.mc.p_vs = pipe->create_vs_state(pipe, &vs);
+       context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs);
        
        free(tokens);
        
        return 0;
 }
 
-static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context)
+static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
 {
        const unsigned int              max_tokens = 100;
        const unsigned int              proc_type = TGSI_PROCESSOR_FRAGMENT;
@@ -837,42 +1096,928 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context)
 
        fs.tokens = tokens;
        
-       context->states.mc.p_fs = pipe->create_fs_state(pipe, &fs);
+       context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs);
        
        free(tokens);
        
        return 0;
 }
 
-static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
+static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
 {
-       const unsigned int              max_tokens = 100;
-       const unsigned int              num_input_attribs = 3;
-       const unsigned int              num_output_attribs = 5;
-       const unsigned int              input_semantic_names[3] =
-                                       {
-                                               TGSI_SEMANTIC_POSITION,
-                                               TGSI_SEMANTIC_GENERIC,
-                                               TGSI_SEMANTIC_GENERIC
-                                       };
-       const unsigned int              output_semantic_names[5] =
-                                       {
-                                               TGSI_SEMANTIC_POSITION,
+       const unsigned int              max_tokens = 200;
+       const unsigned int              proc_type = TGSI_PROCESSOR_FRAGMENT;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+       struct tgsi_processor           *processor;
+
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+       
+       unsigned int                    ti;
+       unsigned int                    i;
+       
+       assert(context);
+       
+       pipe = context->pipe;
+
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+       
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+
+       /* Processor */
+       processor = (struct tgsi_processor*)&tokens[2];
+       *processor = tgsi_build_processor(proc_type, header);
+
+       ti = 3;
+
+       /* Declare inputs (texcoords) 
+          I[0] Luma texcoords
+          I[1] Chroma texcoords
+          I[2] Ref top field surface texcoords
+          I[3] Ref bottom field surface texcoords
+          I[4] Denormalized texel pos */
+       for (i = 0; i < 5; ++i)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+               decl.Semantic.SemanticIndex = i + 1;
+               decl.Declaration.Interpolate = 1;
+               decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR;
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* Declare constant input */
+       /* C[0] is a multiplier to use when concatenating differential into a single channel
+          C[1] is a bias to get differential back to -1,1 range
+          C[2] is constants 2 and 1/2 for Y%2 field selector */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_CONSTANT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 2;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* Declare output */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_OUTPUT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 0;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* Declare samplers */
+       for (i = 0; i < 4; ++i)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_SAMPLER;
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /*
+       tex2d t0.xw, i0, s0     ; Read texel from luma texture into .x and .w channels
+       mov t1.x, t0.w          ; Move high part from .w channel to .x
+       tex2d t0.yw, i1, s1     ; Read texel from chroma Cb texture into .y and .w channels
+       mov t1.y, t0.w          ; Move high part from .w channel to .y
+       tex2d t0.zw, i1, s2     ; Read texel from chroma Cr texture into .z and .w channels
+       mov t1.z, t0.w          ; Move high part from .w channel to .z
+       */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = 0;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+               
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = 1;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               inst.Instruction.NumSrcRegs = 1;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul t1, t1, c0       ; Muliply high part by multiplier to get back its full value */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* add t0, t0, t1       ; Add luma and chroma low and high parts to get a single value */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* sub t0, t0, c1       ; Subtract bias to get back the signed value */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* tex2d t1, i2, s3     ; Read texel from ref macroblock top field
+          tex2d t2, i3, s3     ; Read texel from ref macroblock bottom field */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 1;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i + 2;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+               inst.FullSrcRegisters[1].SrcRegister.Index = 3;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* XXX: Pos values off by 0.5 for rounding? */
+       /* sub t4, i4.y, c2.x   ; Sub 0.5 from position */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 4;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mul t3, t4, c2.x     ; Divide pos y coord by 2 (mul by 0.5) */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* floor t3, t3         ; Get rid of fractional part */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_FLOOR;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 1;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mul t3, t3, c2.y     ; Multiply by 2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* sub t3, t4, t3       ; Subtract from y to get y % 2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 3;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* lerp t1, t3, t1, t2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_LERP;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 3;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[2].SrcRegister.Index = 2;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* add o0, t0, t1       ; Add ref and differential to form final output */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* END */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_END;
+       inst.Instruction.NumDstRegs = 0;
+       inst.Instruction.NumSrcRegs = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       fs.tokens = tokens;
+       
+       context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs);
+       
+       free(tokens);
+       
+       return 0;
+}
+
+static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context)
+{
+       const unsigned int              max_tokens = 100;
+       const unsigned int              num_input_attribs = 3;
+       const unsigned int              num_output_attribs = 5;
+       const unsigned int              input_semantic_names[3] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
+                                               TGSI_SEMANTIC_GENERIC,
+                                               TGSI_SEMANTIC_GENERIC
+                                       };
+       const unsigned int              output_semantic_names[5] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
                                                TGSI_SEMANTIC_GENERIC,
                                                TGSI_SEMANTIC_GENERIC,
                                                TGSI_SEMANTIC_GENERIC,
                                                TGSI_SEMANTIC_GENERIC
                                        };
        const unsigned int              input_semantic_indexes[3] = {0, 1, 2};
-       const unsigned int              output_semantic_indexes[5] = {0, 1, 2, 3, 4};
+       const unsigned int              output_semantic_indexes[5] = {0, 1, 2, 3, 4};
+       const unsigned int              proc_type = TGSI_PROCESSOR_VERTEX;
+       
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+       struct tgsi_processor           *processor;
+       
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+       
+       unsigned int                    ti;
+       unsigned int                    i;
+       
+       assert(context);
+       
+       pipe = context->pipe;
+       
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+
+       /* Processor */
+       processor = (struct tgsi_processor*)&tokens[2];
+       *processor = tgsi_build_processor(proc_type, header);
+
+       ti = 3;
+
+       /* Declare inputs (pos, texcoords) */
+       for (i = 0; i < num_input_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = input_semantic_names[i];
+               decl.Semantic.SemanticIndex = input_semantic_indexes[i];
+
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* Declare constant inputs */
+       /* C[0] scales the normalized MB to cover 16x16 pixels,
+          C[1] translates the macroblock into position on the surface
+          C[2] unused
+          C[3] translates the past surface texcoords to the ref macroblock
+          C[4] unused
+          C[5] translates the future surface texcoords to the ref macroblock */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_CONSTANT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 5;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* Declare outputs (pos, texcoords) */
+       for (i = 0; i < num_output_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_OUTPUT;
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = output_semantic_names[i];
+               decl.Semantic.SemanticIndex = output_semantic_indexes[i];
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul t0, i0, c0       ; Scale normalized coords to window coords */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* add o0, t0, c1       ; Translate vertex into position */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /*
+       mov o1, i1              ; Move luma & chroma texcoords to output
+       mov o2, i2
+       */
+       for (i = 1; i < num_output_attribs - 2; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i;
+               inst.Instruction.NumSrcRegs = 1;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* add o3, t0, c3       ; Translate past surface texcoords into position
+          add o4, t0, c5       ; Repeat for future surface texcoords */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 3;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i * 2 + 3;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+
+       /* END */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_END;
+       inst.Instruction.NumDstRegs = 0;
+       inst.Instruction.NumSrcRegs = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       vs.tokens = tokens;
+       
+       context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs);
+       
+       free(tokens);
+       
+       return 0;
+}
+
+static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context)
+{
+       const unsigned int              max_tokens = 100;
+       const unsigned int              num_input_attribs = 3;
+       const unsigned int              num_output_attribs = 8;
+       const unsigned int              input_semantic_names[3] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC   /* Chroma texcoords */
+                                       };
+       const unsigned int              output_semantic_names[8] =
+                                       {
+                                               TGSI_SEMANTIC_POSITION,
+                                               TGSI_SEMANTIC_GENERIC,  /* Luma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Chroma texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Top field past surface texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Bottom field past surface texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Top field future surface texcoords */
+                                               TGSI_SEMANTIC_GENERIC,  /* Bottom field future surface texcoords */
+                                               TGSI_SEMANTIC_POSITION  /* Pos */
+                                       };
+       const unsigned int              input_semantic_indexes[3] = {0, 1, 2};
+       const unsigned int              output_semantic_indexes[8] = {0, 1, 2, 3, 4, 5, 6, 7};
        const unsigned int              proc_type = TGSI_PROCESSOR_VERTEX;
        
        struct pipe_context             *pipe;
-       struct pipe_shader_state        vs;
+       struct pipe_shader_state        vs;
+       struct tgsi_token               *tokens;
+       struct tgsi_header              *header;
+       struct tgsi_processor           *processor;
+       
+       struct tgsi_full_declaration    decl;
+       struct tgsi_full_instruction    inst;
+       
+       unsigned int                    ti;
+       unsigned int                    i;
+       
+       assert(context);
+       
+       pipe = context->pipe;
+       
+       tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+       /* Version */
+       *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+
+       /* Header */
+       header = (struct tgsi_header*)&tokens[1];
+       *header = tgsi_build_header();
+
+       /* Processor */
+       processor = (struct tgsi_processor*)&tokens[2];
+       *processor = tgsi_build_processor(proc_type, header);
+
+       ti = 3;
+
+       /* Declare inputs (pos, texcoords) */
+       for (i = 0; i < num_input_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_INPUT;
+
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = input_semantic_names[i];
+               decl.Semantic.SemanticIndex = input_semantic_indexes[i];
+
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* Declare constant inputs */
+       /* C[0] scales the normalized MB to cover 16x16 pixels,
+          C[1] translates the macroblock into position on the surface
+          C[2] denormalizes pos components
+          C[3] translates the past surface top field texcoords to the ref macroblock
+          C[4] translates the past surface bottom field texcoords to the ref macroblock
+          C[5] translates the future surface top field texcoords to the ref macroblock
+          C[6] translates the future surface bottom field texcoords to the ref macroblock */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_CONSTANT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 6;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* Declare outputs (pos, texcoords) */
+       for (i = 0; i < num_output_attribs; i++)
+       {
+               decl = tgsi_default_full_declaration();
+               decl.Declaration.File = TGSI_FILE_OUTPUT;
+               decl.Declaration.Semantic = 1;
+               decl.Semantic.SemanticName = output_semantic_names[i];
+               decl.Semantic.SemanticIndex = output_semantic_indexes[i];
+               decl.u.DeclarationRange.First = i;
+               decl.u.DeclarationRange.Last = i;
+               ti += tgsi_build_full_declaration
+               (
+                       &decl,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul t0, i0, c0       ; Scale normalized coords to window coords */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* add t1, t0, c1       ; Translate vertex into position */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mov o0, t1           ; Move vertex pos to output */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 1;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /*
+       mov o1, i1              ; Move luma & chroma texcoords to output
+       mov o2, i2
+       */
+       for (i = 1; i < num_output_attribs - 1; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i;
+               inst.Instruction.NumSrcRegs = 1;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+
+       /* add o3, t0, c3       ; Translate top field past texcoords into position
+          add o4, t0, c4       ; Translate bottom field past texcoords into position
+          add o5, t0, c5       ; Translate top field past texcoords into position
+          add o6, t0, c6       ; Translate bottom field past texcoords into position */
+       for (i = 0; i < 4; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 3;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i + 3;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul o7, t1, c2       ; Denorm pos for fragment shader */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 7;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+
+       /* END */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_END;
+       inst.Instruction.NumDstRegs = 0;
+       inst.Instruction.NumSrcRegs = 0;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       vs.tokens = tokens;
+       
+       context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs);
+       
+       free(tokens);
+       
+       return 0;
+}
+
+static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
+{
+       const unsigned int              max_tokens = 100;
+       const unsigned int              proc_type = TGSI_PROCESSOR_FRAGMENT;
+
+       struct pipe_context             *pipe;
+       struct pipe_shader_state        fs;
        struct tgsi_token               *tokens;
        struct tgsi_header              *header;
        struct tgsi_processor           *processor;
-       
+
        struct tgsi_full_declaration    decl;
        struct tgsi_full_instruction    inst;
        
@@ -882,12 +2027,12 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
        assert(context);
        
        pipe = context->pipe;
-       
+
        tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
 
        /* Version */
        *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-
+       
        /* Header */
        header = (struct tgsi_header*)&tokens[1];
        *header = tgsi_build_header();
@@ -898,16 +2043,16 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
 
        ti = 3;
 
-       /* Declare inputs (pos, texcoords) */
-       for (i = 0; i < num_input_attribs; i++)
+       /* Declare inputs (texcoords) */
+       for (i = 0; i < 4; ++i)
        {
                decl = tgsi_default_full_declaration();
                decl.Declaration.File = TGSI_FILE_INPUT;
-
                decl.Declaration.Semantic = 1;
-               decl.Semantic.SemanticName = input_semantic_names[i];
-               decl.Semantic.SemanticIndex = input_semantic_indexes[i];
-
+               decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+               decl.Semantic.SemanticIndex = i + 1;
+               decl.Declaration.Interpolate = 1;
+               decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR;
                decl.u.DeclarationRange.First = i;
                decl.u.DeclarationRange.Last = i;
                ti += tgsi_build_full_declaration
@@ -919,18 +2064,17 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                );
        }
        
-       /* Declare constant inputs */
-       /* C[0] scales the normalized MB to cover 16x16 pixels,
-          C[1] translates the macroblock into position on the surface
-          C[2] translates the past surface texcoords to the ref macroblock
-          C[3] translates the future surface texcoords to the ref macroblock */
+       /* Declare constant input */
+       /* C[0] is a multiplier to use when concatenating differential into a single channel
+          C[1] is a bias to get differential back to -1,1 range
+          C[2] contains 0.5 in channel X for use as a weight to blend past and future samples */
        decl = tgsi_default_full_declaration();
        decl.Declaration.File = TGSI_FILE_CONSTANT;
        decl.Declaration.Semantic = 1;
        decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
        decl.Semantic.SemanticIndex = 0;
        decl.u.DeclarationRange.First = 0;
-       decl.u.DeclarationRange.Last = 3;
+       decl.u.DeclarationRange.Last = 2;
        ti += tgsi_build_full_declaration
        (
                &decl,
@@ -939,14 +2083,27 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                max_tokens - ti
        );
 
-       /* Declare outputs (pos, texcoords) */
-       for (i = 0; i < num_output_attribs; i++)
+       /* Declare output */
+       decl = tgsi_default_full_declaration();
+       decl.Declaration.File = TGSI_FILE_OUTPUT;
+       decl.Declaration.Semantic = 1;
+       decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
+       decl.Semantic.SemanticIndex = 0;
+       decl.u.DeclarationRange.First = 0;
+       decl.u.DeclarationRange.Last = 0;
+       ti += tgsi_build_full_declaration
+       (
+               &decl,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* Declare samplers */
+       for (i = 0; i < 5; ++i)
        {
                decl = tgsi_default_full_declaration();
-               decl.Declaration.File = TGSI_FILE_OUTPUT;
-               decl.Declaration.Semantic = 1;
-               decl.Semantic.SemanticName = output_semantic_names[i];
-               decl.Semantic.SemanticIndex = output_semantic_indexes[i];
+               decl.Declaration.File = TGSI_FILE_SAMPLER;
                decl.u.DeclarationRange.First = i;
                decl.u.DeclarationRange.Last = i;
                ti += tgsi_build_full_declaration
@@ -958,15 +2115,67 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                );
        }
        
-       /* mul t0, i0, c0       ; Scale normalized coords to window coords */
+       /*
+       tex2d t0.xw, i0, s0     ; Read texel from luma texture into .x and .w channels
+       mov t1.x, t0.w          ; Move high part from .w channel to .x
+       tex2d t0.yw, i1, s1     ; Read texel from chroma Cb texture into .y and .w channels
+       mov t1.y, t0.w          ; Move high part from .w channel to .y
+       tex2d t0.zw, i1, s2     ; Read texel from chroma Cr texture into .z and .w channels
+       mov t1.z, t0.w          ; Move high part from .w channel to .z
+       */
+       for (i = 0; i < 3; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = 0;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+               
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = 1;
+               inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+               inst.Instruction.NumSrcRegs = 1;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* mul t1, t1, c0       ; Muliply high part by multiplier to get back its full value */
        inst = tgsi_default_full_instruction();
        inst.Instruction.Opcode = TGSI_OPCODE_MUL;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
        inst.Instruction.NumSrcRegs = 2;
-       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 1;
        inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
        inst.FullSrcRegisters[1].SrcRegister.Index = 0;
        ti += tgsi_build_full_instruction
@@ -976,17 +2185,17 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                header,
                max_tokens - ti
        );
-
-       /* add o0, t0, c1       ; Translate vertex into position */
+       
+       /* add t0, t0, t1       ; Add luma and chroma low and high parts to get a single value */
        inst = tgsi_default_full_instruction();
        inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        inst.Instruction.NumDstRegs = 1;
-       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullDstRegisters[0].DstRegister.Index = 0;
        inst.Instruction.NumSrcRegs = 2;
        inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullSrcRegisters[0].SrcRegister.Index = 0;
-       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullSrcRegisters[1].SrcRegister.Index = 1;
        ti += tgsi_build_full_instruction
        (
@@ -996,43 +2205,40 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                max_tokens - ti
        );
        
-       /*
-       mov o1, i1              ; Move luma & chroma texcoords to output
-       mov o2, i2
-       */
-       for (i = 1; i < num_output_attribs - 2; ++i)
-       {
-               inst = tgsi_default_full_instruction();
-               inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-               inst.Instruction.NumDstRegs = 1;
-               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-               inst.FullDstRegisters[0].DstRegister.Index = i;
-               inst.Instruction.NumSrcRegs = 1;
-               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-               inst.FullSrcRegisters[0].SrcRegister.Index = i;
-               ti += tgsi_build_full_instruction
-               (
-                       &inst,
-                       &tokens[ti],
-                       header,
-                       max_tokens - ti
-               );
-       }
+       /* sub t0, t0, c1       ; Subtract bias to get back the signed value */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
        
-       /* add o3, t0, c2       ; Translate past surface texcoords into position
-          add o4, t0, c3       ; Repeat for future surface texcoords */
+       /* tex2d t1, i2, s3     ; Read texel from past macroblock
+          tex2d t2, i3, s4     ; Read texel from future macroblock */
        for (i = 0; i < 2; ++i)
        {
                inst = tgsi_default_full_instruction();
-               inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+               inst.Instruction.Opcode = TGSI_OPCODE_TEX;
                inst.Instruction.NumDstRegs = 1;
-               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-               inst.FullDstRegisters[0].DstRegister.Index = i + 3;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 1;
                inst.Instruction.NumSrcRegs = 2;
-               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-               inst.FullSrcRegisters[0].SrcRegister.Index = 0;
-               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
-               inst.FullSrcRegisters[1].SrcRegister.Index = i + 2;
+               inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i + 2;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+               inst.FullSrcRegisters[1].SrcRegister.Index = i + 3;
                ti += tgsi_build_full_instruction
                (
                        &inst,
@@ -1041,6 +2247,50 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                        max_tokens - ti
                );
        }
+       
+       /* lerp t1, c2.x, t1, t2        ; Blend past and future texels */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_LERP;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 3;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[2].SrcRegister.Index = 2;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* add o0, t0, t1       ; Add ref and differential to form final output */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+       inst.FullDstRegisters[0].DstRegister.Index = 0;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
 
        /* END */
        inst = tgsi_default_full_instruction();
@@ -1054,19 +2304,19 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context)
                header,
                max_tokens - ti
        );
+
+       fs.tokens = tokens;
        
-       vs.tokens = tokens;
-       
-       context->states.mc.b_vs = pipe->create_vs_state(pipe, &vs);
+       context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs);
        
        free(tokens);
        
        return 0;
 }
 
-static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
+static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
 {
-       const unsigned int              max_tokens = 100;
+       const unsigned int              max_tokens = 200;
        const unsigned int              proc_type = TGSI_PROCESSOR_FRAGMENT;
 
        struct pipe_context             *pipe;
@@ -1100,8 +2350,15 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
 
        ti = 3;
 
-       /* Declare inputs (texcoords) */
-       for (i = 0; i < 4; ++i)
+       /* Declare inputs (texcoords) 
+          I[0] Luma texcoords
+          I[1] Chroma texcoords
+          I[2] Past top field surface texcoords
+          I[3] Past bottom field surface texcoords
+          I[4] Future top field surface texcoords
+          I[5] Future bottom field surface texcoords
+          I[6] Denormalized texel pos */
+       for (i = 0; i < 7; ++i)
        {
                decl = tgsi_default_full_declaration();
                decl.Declaration.File = TGSI_FILE_INPUT;
@@ -1123,14 +2380,15 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
        
        /* Declare constant input */
        /* C[0] is a multiplier to use when concatenating differential into a single channel
-          C[0] is a bias to get differential back to -1,1 range*/
+          C[1] is a bias to get differential back to -1,1 range
+          C[2] is constants 2 and 1/2 for Y%2 field selector */
        decl = tgsi_default_full_declaration();
        decl.Declaration.File = TGSI_FILE_CONSTANT;
        decl.Declaration.Semantic = 1;
        decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
        decl.Semantic.SemanticIndex = 0;
        decl.u.DeclarationRange.First = 0;
-       decl.u.DeclarationRange.Last = 1;
+       decl.u.DeclarationRange.Last = 2;
        ti += tgsi_build_full_declaration
        (
                &decl,
@@ -1280,8 +2538,118 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
                max_tokens - ti
        );
        
-       /* tex2d t1, i2, s3     ; Read texel from past macroblock
-          tex2d t2, i3, s4     ; Read texel from future macroblock */
+       /* XXX: Pos values off by 0.5 for rounding? */
+       /* sub t4, i6.y, c2.x   ; Sub 0.5 from position */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 4;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 6;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mul t3, t4, c2.x     ; Divide pos y coord by 2 (mul by 0.5) */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* floor t3, t3         ; Get rid of fractional part */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_FLOOR;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 1;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* mul t3, t3, c2.y     ; Multiply by 2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+       inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* sub t3, t4, t3       ; Subtract from y to get y % 2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 3;
+       inst.Instruction.NumSrcRegs = 2;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 3;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* tex2d t1, i2, s3     ; Read texel from past macroblock top field
+          tex2d t2, i3, s3     ; Read texel from past macroblock bottom field */
        for (i = 0; i < 2; ++i)
        {
                inst = tgsi_default_full_instruction();
@@ -1294,7 +2662,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
                inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
                inst.FullSrcRegisters[0].SrcRegister.Index = i + 2;
                inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
-               inst.FullSrcRegisters[1].SrcRegister.Index = i + 3;
+               inst.FullSrcRegisters[1].SrcRegister.Index = 3;
                ti += tgsi_build_full_instruction
                (
                        &inst,
@@ -1304,17 +2672,89 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
                );
        }
        
-       /* add t0, t0, t1       ; Add past and differential to form partial output */
+       /* lerp t1, t3, t1, t2 */
        inst = tgsi_default_full_instruction();
-       inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+       inst.Instruction.Opcode = TGSI_OPCODE_LERP;
        inst.Instruction.NumDstRegs = 1;
        inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-       inst.FullDstRegisters[0].DstRegister.Index = 0;
-       inst.Instruction.NumSrcRegs = 2;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 3;
        inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-       inst.FullSrcRegisters[0].SrcRegister.Index = 0;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[2].SrcRegister.Index = 2;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* tex2d t4, i4, s4     ; Read texel from future macroblock top field
+          tex2d t5, i5, s4     ; Read texel from future macroblock bottom field */
+       for (i = 0; i < 2; ++i)
+       {
+               inst = tgsi_default_full_instruction();
+               inst.Instruction.Opcode = TGSI_OPCODE_TEX;
+               inst.Instruction.NumDstRegs = 1;
+               inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+               inst.FullDstRegisters[0].DstRegister.Index = i + 4;
+               inst.Instruction.NumSrcRegs = 2;
+               inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+               inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+               inst.FullSrcRegisters[0].SrcRegister.Index = i + 4;
+               inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+               inst.FullSrcRegisters[1].SrcRegister.Index = 4;
+               ti += tgsi_build_full_instruction
+               (
+                       &inst,
+                       &tokens[ti],
+                       header,
+                       max_tokens - ti
+               );
+       }
+       
+       /* lerp t2, t3, t4, t5 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_LERP;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 2;
+       inst.Instruction.NumSrcRegs = 3;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 3;
+       inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 4;
+       inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[2].SrcRegister.Index = 5;
+       ti += tgsi_build_full_instruction
+       (
+               &inst,
+               &tokens[ti],
+               header,
+               max_tokens - ti
+       );
+       
+       /* lerp t1, c2.x, t1, t2 */
+       inst = tgsi_default_full_instruction();
+       inst.Instruction.Opcode = TGSI_OPCODE_LERP;
+       inst.Instruction.NumDstRegs = 1;
+       inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullDstRegisters[0].DstRegister.Index = 1;
+       inst.Instruction.NumSrcRegs = 3;
+       inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT;
+       inst.FullSrcRegisters[0].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+       inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullSrcRegisters[1].SrcRegister.Index = 1;
+       inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+       inst.FullSrcRegisters[2].SrcRegister.Index = 2;
        ti += tgsi_build_full_instruction
        (
                &inst,
@@ -1323,7 +2763,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
                max_tokens - ti
        );
        
-       /* add o0, t0, t2       ; Add future and differential to form final output */
+       /* add o0, t0, t1       ; Add future and differential to form final output */
        inst = tgsi_default_full_instruction();
        inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        inst.Instruction.NumDstRegs = 1;
@@ -1333,7 +2773,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
        inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
        inst.FullSrcRegisters[0].SrcRegister.Index = 0;
        inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-       inst.FullSrcRegisters[1].SrcRegister.Index = 2;
+       inst.FullSrcRegisters[1].SrcRegister.Index = 1;
        ti += tgsi_build_full_instruction
        (
                &inst,
@@ -1357,7 +2797,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context)
 
        fs.tokens = tokens;
        
-       context->states.mc.b_fs = pipe->create_fs_state(pipe, &fs);
+       context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs);
        
        free(tokens);
        
@@ -1491,7 +2931,6 @@ static int vlInitMC(struct VL_CONTEXT *context)
        context->states.mc.render_target.height = context->video_height;
        context->states.mc.render_target.num_cbufs = 1;
        /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */
-       /*context->states.mc.render_target.cbufs[0] = ;*/
        context->states.mc.render_target.zsbuf = NULL;
        
        filters[0] = PIPE_TEX_FILTER_NEAREST;
@@ -1530,6 +2969,7 @@ static int vlInitMC(struct VL_CONTEXT *context)
        template.depth[0] = 1;
        template.compressed = 0;
        template.cpp = 2;
+       
        context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template);
        
        if (context->video_format == VL_FORMAT_YCBCR_420)
@@ -1548,10 +2988,14 @@ static int vlInitMC(struct VL_CONTEXT *context)
        
        vlCreateVertexShaderIMC(context);
        vlCreateFragmentShaderIMC(context);
-       vlCreateVertexShaderPMC(context);
-       vlCreateFragmentShaderPMC(context);
-       vlCreateVertexShaderBMC(context);
-       vlCreateFragmentShaderBMC(context);
+       vlCreateVertexShaderFramePMC(context);
+       vlCreateVertexShaderFieldPMC(context);
+       vlCreateFragmentShaderFramePMC(context);
+       vlCreateFragmentShaderFieldPMC(context);
+       vlCreateVertexShaderFrameBMC(context);
+       vlCreateVertexShaderFieldBMC(context);
+       vlCreateFragmentShaderFrameBMC(context);
+       vlCreateFragmentShaderFieldBMC(context);
        vlCreateDataBufsMC(context);
        
        return 0;
@@ -1575,10 +3019,14 @@ static int vlDestroyMC(struct VL_CONTEXT *context)
        
        context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs);
        context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs);
-       context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs);
-       context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs);
-       context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs);
-       context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs);
+       
+       for (i = 0; i < 2; ++i)
+       {
+               context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]);
+               context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]);
+               context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]);
+               context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]);
+       }
        
        context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer);
        context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer);
@@ -1982,7 +3430,7 @@ static int vlCreateDataBufsCSC(struct VL_CONTEXT *context)
        
        /*
        TODO: Refactor this into a seperate function,
-       allow changing the csc matrix at runtime to switch between regular & full versions
+       allow changing the CSC matrix at runtime to switch between regular & full versions
        */
        memcpy
        (
@@ -2166,7 +3614,7 @@ static int vlDestroy(struct VL_CONTEXT *context)
 {
        assert(context);
        
-       /* Must unbind shaders before we can delete them for some reason */
+       /* XXX: Must unbind shaders before we can delete them for some reason */
        context->pipe->bind_vs_state(context->pipe, NULL);
        context->pipe->bind_fs_state(context->pipe, NULL);
        
index f26a4c5b6a7ba88826d2f512ff75885d4eec5542..8a1231807341c95481e0c58c3a8edb0520de3792 100644 (file)
@@ -34,8 +34,8 @@ struct VL_CONTEXT
                        struct pipe_framebuffer_state           render_target;
                        struct pipe_sampler_state               *samplers[5];
                        struct pipe_texture                     *textures[5];
-                       struct pipe_shader_state                *i_vs, *p_vs, *b_vs;
-                       struct pipe_shader_state                *i_fs, *p_fs, *b_fs;
+                       struct pipe_shader_state                *i_vs, *p_vs[2], *b_vs[2];
+                       struct pipe_shader_state                *i_fs, *p_fs[2], *b_fs[2];
                        struct pipe_vertex_buffer               vertex_bufs[3];
                        struct pipe_vertex_element              vertex_buf_elems[3];
                        struct pipe_constant_buffer             vs_const_buf, fs_const_buf;
index 27893aee95e71028ce61421b2979262d941bd884..7e6ee8ac12fadc9c7edb82bf48ee69087a3917eb 100644 (file)
@@ -87,7 +87,8 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)
 const struct VL_MC_FS_CONSTS vl_mc_fs_consts =
 {
        {256.0f, 256.0f, 256.0f, 0.0f},
-       {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}
+       {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f},
+       {0.5f, 2.0f, 0.0f, 0.0f}
 };
 
 /*
index 6451e54953d2209cf5f847b2bb3775c6ce6e1413..d2220d7abf520f275fa92e07977776e7b39555fd 100644 (file)
@@ -8,6 +8,85 @@
 #include "vl_context.h"
 #include "vl_defs.h"
 
+static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int y;
+       
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+       
+       return 0;
+}
+
+static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int x, y;
+       
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+                       dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
+       
+       return 0;
+}
+
+static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int y;
+       
+       for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch * 2,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+       
+       dst += VL_BLOCK_HEIGHT * dst_pitch;
+       
+       for (; y < VL_BLOCK_HEIGHT; ++y)
+               memcpy
+               (
+                       dst + y * dst_pitch * 2,
+                       src + y * VL_BLOCK_WIDTH,
+                       VL_BLOCK_WIDTH * 2
+               );
+       
+       return 0;
+}
+
+static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
+{
+       unsigned int x, y;
+       
+       for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
+               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+                       dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
+       
+       dst += VL_BLOCK_HEIGHT * dst_pitch;
+       
+       for (; y < VL_BLOCK_HEIGHT; ++y)
+               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+                       dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
+       
+       return 0;
+}
+
+static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
+{
+       unsigned int x, y;
+       
+       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+                       dst[y * dst_pitch + x] = 0x100;
+       
+       return 0;
+}
+
 static int vlGrabBlocks
 (
        struct VL_CONTEXT *context,
@@ -19,7 +98,7 @@ static int vlGrabBlocks
 {
        struct pipe_surface     *tex_surface;
        short                   *texels;
-       unsigned int            b, x, y, y2;
+       unsigned int            tb, sb = 0;
        
        assert(context);
        assert(blocks);
@@ -33,134 +112,81 @@ static int vlGrabBlocks
        
        texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
        
-       for (b = 0; b < 4; ++b)
+       for (tb = 0; tb < 4; ++tb)
        {
-               if ((coded_block_pattern >> b) & 1)
+               if ((coded_block_pattern >> (5 - tb)) & 1)
                {
                        if (dct_type == VL_DCT_FRAME_CODED)
-                       {
                                if (sample_type == VL_FULL_SAMPLE)
-                               {
-                                       for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y)
-                                               memcpy
-                                               (
-                                                       texels + y * tex_surface->pitch,
-                                                       blocks + y * VL_BLOCK_WIDTH,
-                                                       VL_BLOCK_WIDTH * 2
-                                               );
-                               }
+                                       vlGrabFrameCodedFullBlock
+                                       (
+                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                               texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT,
+                                               tex_surface->pitch
+                                       );
                                else
-                               {
-                                       for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y)
-                                               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                                       texels[y * tex_surface->pitch + x] =
-                                                       blocks[y * VL_BLOCK_WIDTH + x] + 0x100;
-                               }
-                       }
+                                       vlGrabFrameCodedDiffBlock
+                                       (
+                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                               texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT,
+                                               tex_surface->pitch
+                                       );
                        else
-                       {
                                if (sample_type == VL_FULL_SAMPLE)
-                               {
-                                       for
+                                       vlGrabFieldCodedFullBlock
                                        (
-                                               y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b;
-                                               y < VL_BLOCK_HEIGHT * ((b % 2) + 1);
-                                               y += 2, ++y2
-                                       )
-                                               memcpy
-                                               (
-                                                       texels + y * tex_surface->pitch,
-                                                       blocks + y2 * VL_BLOCK_WIDTH,
-                                                       VL_BLOCK_WIDTH * 2
-                                               );
-                                       for
-                                       (
-                                               y = VL_BLOCK_HEIGHT * ((b % 2) + 2);
-                                               y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1);
-                                               y += 2, ++y2
-                                       )
-                                               memcpy
-                                               (
-                                                       texels + y * tex_surface->pitch,
-                                                       blocks + y2 * VL_BLOCK_WIDTH,
-                                                       VL_BLOCK_WIDTH * 2
-                                               );
-                               }
+                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                               texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch,
+                                               tex_surface->pitch
+                                       );
                                else
-                               {
-                                       for
+                                       vlGrabFieldCodedDiffBlock
                                        (
-                                               y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b;
-                                               y < VL_BLOCK_HEIGHT * ((b % 2) + 1);
-                                               y += 2, ++y2
-                                       )
-                                               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                                       texels[y * tex_surface->pitch + x] =
-                                                       blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100;
-                                       for
-                                       (
-                                               y = VL_BLOCK_HEIGHT * ((b % 2) + 2);
-                                               y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1);
-                                               y += 2, ++y2
-                                       )
-                                               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                                       texels[y * tex_surface->pitch + x] =
-                                                       blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100;
-                               }
-                       }
+                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                               texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch,
+                                               tex_surface->pitch
+                                       );
+                       ++sb;
                }
                else
-               {
-                       for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y)
-                       {
-                               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                       texels[y * tex_surface->pitch + x] = 0x100;
-                       }
-               }
+                       vlGrabNoBlock(texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, tex_surface->pitch);
        }
        
        pipe_surface_unmap(tex_surface);
        
        /* TODO: Implement 422, 444 */
-       for (b = 0; b < 2; ++b)
+       for (tb = 0; tb < 2; ++tb)
        {
                tex_surface = context->pipe->screen->get_tex_surface
-               (
-                       context->pipe->screen,
-                       context->states.mc.textures[b + 1],
-                       0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
-               );
+                       (
+                               context->pipe->screen,
+                               context->states.mc.textures[tb + 1],
+                               0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
+                       );
        
                texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
                
-               if ((coded_block_pattern >> (b + 4)) & 1)
-               {
+               if ((coded_block_pattern >> (1 - tb)) & 1)
+               {                       
                        if (sample_type == VL_FULL_SAMPLE)
-                       {
-                               for (y = 0; y < tex_surface->height; ++y)
-                                       memcpy
-                                       (
-                                               texels + y * tex_surface->pitch,
-                                               blocks + VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH,
-                                               VL_BLOCK_WIDTH * 2
-                                       );
-                       }
+                               vlGrabFrameCodedFullBlock
+                               (
+                                       blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                       texels,
+                                       tex_surface->pitch
+                               );
                        else
-                       {
-                               for (y = 0; y < tex_surface->height; ++y)
-                                       for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                               texels[y * tex_surface->pitch + x] =
-                                               blocks[VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH + x] + 0x100;
-                       }
+                               vlGrabFrameCodedDiffBlock
+                               (
+                                       blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                       texels,
+                                       tex_surface->pitch
+                               );
+                       
+                       ++sb;
                }
                else
-               {
-                       for (y = 0; y < tex_surface->height; ++y)
-                       {
-                               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                                       texels[y * tex_surface->pitch + x] = 0x100;
-                       }
-               }
+                       vlGrabNoBlock(texels, tex_surface->pitch);
                
                pipe_surface_unmap(tex_surface);
        }
@@ -229,41 +255,35 @@ int vlRenderIMacroBlock
 )
 {
        struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vscbdata;
+       struct VL_MC_VS_CONSTS  *vs_consts;
        
        assert(blocks);
        assert(surface);
        
        /* TODO: Implement interlaced rendering */
-       /*assert(picture_type == VL_FRAME_PICTURE);*/
        if (picture_type != VL_FRAME_PICTURE)
-       {
-               /*fprintf(stderr, "field picture (I) unimplemented, ignoring\n");*/
                return 0;
-       }
        
        pipe = surface->context->pipe;
        
-       vscbdata = pipe->winsys->buffer_map
+       vs_consts = pipe->winsys->buffer_map
        (
                pipe->winsys,
                surface->context->states.mc.vs_const_buf.buffer,
                PIPE_BUFFER_USAGE_CPU_WRITE
        );
        
-       vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vscbdata->scale.z = 1.0f;
-       vscbdata->scale.w = 1.0f;
-       vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vscbdata->mb_pos_trans.z = 0.0f;
-       vscbdata->mb_pos_trans.w = 0.0f;
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
        
        pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
        
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
-       
        surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
        (
                pipe->screen,
@@ -276,6 +296,8 @@ int vlRenderIMacroBlock
        pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs);
        pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs);
        
+       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
+       
        pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
        
        return 0;
@@ -297,7 +319,7 @@ int vlRenderPMacroBlock
 )
 {
        struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vscbdata;
+       struct VL_MC_VS_CONSTS  *vs_consts;
        
        assert(motion_vectors);
        assert(blocks);
@@ -305,46 +327,55 @@ int vlRenderPMacroBlock
        assert(surface);
        
        /* TODO: Implement interlaced rendering */
-       /*assert(picture_type == VL_FRAME_PICTURE);*/
        if (picture_type != VL_FRAME_PICTURE)
-       {
-               /*fprintf(stderr, "field picture (P) unimplemented, ignoring\n");*/
                return 0;
-       }
-       /* TODO: Implement field based motion compensation */
-       /*assert(mc_type == VL_FRAME_MC);*/
-       if (mc_type != VL_FRAME_MC)
-       {
-               /*fprintf(stderr, "field MC (P) unimplemented, ignoring\n");*/
+       /* TODO: Implement other MC types */
+       if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
                return 0;
-       }
        
        pipe = surface->context->pipe;
        
-       vscbdata = pipe->winsys->buffer_map
+       vs_consts = pipe->winsys->buffer_map
        (
                pipe->winsys,
                surface->context->states.mc.vs_const_buf.buffer,
                PIPE_BUFFER_USAGE_CPU_WRITE
        );
        
-       vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vscbdata->scale.z = 1.0f;
-       vscbdata->scale.w = 1.0f;
-       vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vscbdata->mb_pos_trans.z = 0.0f;
-       vscbdata->mb_pos_trans.w = 0.0f;
-       vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width;
-       vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height;
-       vscbdata->mb_tc_trans[0].z = 0.0f;
-       vscbdata->mb_tc_trans[0].w = 0.0f;
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width;
+       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height;
+       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
+       
+       if (mc_type == VL_FIELD_MC)
+       {
+               vs_consts->denorm.x = (float)surface->width;
+               vs_consts->denorm.y = (float)surface->height;
+               
+               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width;
+               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height;
+               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
+               
+               pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]);
+               pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]);
+       }
+       else
+       {
+               pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]);
+               pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]);
+       }
        
        pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
        
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-       
        surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
        (
                pipe->screen,
@@ -356,8 +387,8 @@ int vlRenderPMacroBlock
        surface->context->states.mc.textures[3] = ref_surface->texture;
        pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures);
        pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers);
-       pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs);
-       pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs);
+       
+       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
        
        pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
        
@@ -381,7 +412,7 @@ int vlRenderBMacroBlock
 )
 {
        struct pipe_context     *pipe;
-       struct VL_MC_VS_CONSTS  *vscbdata;
+       struct VL_MC_VS_CONSTS  *vs_consts;
        
        assert(motion_vectors);
        assert(blocks);
@@ -389,50 +420,63 @@ int vlRenderBMacroBlock
        assert(surface);
        
        /* TODO: Implement interlaced rendering */
-       /*assert(picture_type == VL_FRAME_PICTURE);*/
        if (picture_type != VL_FRAME_PICTURE)
-       {
-               /*fprintf(stderr, "field picture (B) unimplemented, ignoring\n");*/
                return 0;
-       }
-       /* TODO: Implement field based motion compensation */
-       /*assert(mc_type == VL_FRAME_MC);*/
-       if (mc_type != VL_FRAME_MC)
-       {
-               /*fprintf(stderr, "field MC (B) unimplemented, ignoring\n");*/
+       /* TODO: Implement other MC types */
+       if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
                return 0;
-       }
        
        pipe = surface->context->pipe;
        
-       vscbdata = pipe->winsys->buffer_map
+       vs_consts = pipe->winsys->buffer_map
        (
                pipe->winsys,
                surface->context->states.mc.vs_const_buf.buffer,
                PIPE_BUFFER_USAGE_CPU_WRITE
        );
        
-       vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
-       vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
-       vscbdata->scale.z = 1.0f;
-       vscbdata->scale.w = 1.0f;
-       vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
-       vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
-       vscbdata->mb_pos_trans.z = 0.0f;
-       vscbdata->mb_pos_trans.w = 0.0f;
-       vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width;
-       vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height;
-       vscbdata->mb_tc_trans[0].z = 0.0f;
-       vscbdata->mb_tc_trans[0].w = 0.0f;
-       vscbdata->mb_tc_trans[1].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width;
-       vscbdata->mb_tc_trans[1].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height;
-       vscbdata->mb_tc_trans[1].z = 0.0f;
-       vscbdata->mb_tc_trans[1].w = 0.0f;
+       vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width;
+       vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height;
+       vs_consts->scale.z = 1.0f;
+       vs_consts->scale.w = 1.0f;
+       vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width;
+       vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height;
+       vs_consts->mb_pos_trans.z = 0.0f;
+       vs_consts->mb_pos_trans.w = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width;
+       vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height;
+       vs_consts->mb_tc_trans[0].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[0].top_field.w = 0.0f;
+       vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width;
+       vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height;
+       vs_consts->mb_tc_trans[1].top_field.z = 0.0f;
+       vs_consts->mb_tc_trans[1].top_field.w = 0.0f;
+       
+       if (mc_type == VL_FIELD_MC)
+       {
+               vs_consts->denorm.x = (float)surface->width;
+               vs_consts->denorm.y = (float)surface->height;
+               
+               vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width;
+               vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height;
+               vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f;
+               vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width;
+               vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height;
+               vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f;
+               vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f;
+               
+               pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]);
+               pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]);
+       }
+       else
+       {
+               pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]);
+               pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]);
+       }
        
        pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer);
        
-       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-       
        surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface
        (
                pipe->screen,
@@ -445,8 +489,8 @@ int vlRenderBMacroBlock
        surface->context->states.mc.textures[4] = future_surface->texture;
        pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures);
        pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers);
-       pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs);
-       pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs);
+       
+       vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
        
        pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
        
@@ -513,7 +557,7 @@ int vlPutSurface
                        destw,
                        desth,
                        PIPE_FORMAT_A8R8G8B8_UNORM,
-                       /*XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */
+                       /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */
                        PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE,
                        0
                );
index 7040b74503624c21186c6dc4bd1b4f4500806487..97753699db6a1217e6e54b530ea743993932c715 100644 (file)
@@ -36,7 +36,9 @@ enum VL_SAMPLE_TYPE
 enum VL_MC_TYPE
 {
        VL_FIELD_MC,
-       VL_FRAME_MC
+       VL_FRAME_MC,
+       VL_DUAL_PRIME_MC,
+       VL_16x8_MC = VL_FRAME_MC
 };
 
 struct VL_VERTEX4F
@@ -58,13 +60,19 @@ struct VL_MC_VS_CONSTS
 {
        struct VL_VERTEX4F      scale;
        struct VL_VERTEX4F      mb_pos_trans;
-       struct VL_VERTEX4F      mb_tc_trans[2];
+       struct VL_VERTEX4F      denorm;
+       struct
+       {
+               struct VL_VERTEX4F      top_field;
+               struct VL_VERTEX4F      bottom_field;
+       } mb_tc_trans[2];
 };
 
 struct VL_MC_FS_CONSTS
 {
        struct VL_VERTEX4F      multiplier;
        struct VL_VERTEX4F      bias;
+       struct VL_VERTEX4F      y_divider;
 };
 
 struct VL_CSC_FS_CONSTS
index e4602d820425abe73bce97cde45b140eebaaf997..5656895650eb37d7fa1db17c2df2b6063f0b206e 100644 (file)
@@ -32,6 +32,34 @@ static enum VL_PICTURE PictureToVL(int xvmc_pic)
        return vl_pic;
 }
 
+static enum VL_MC_TYPE MotionToVL(int xvmc_motion_type)
+{
+       enum VL_MC_TYPE vl_mc_type;
+       
+       switch (xvmc_motion_type)
+       {
+               case XVMC_PREDICTION_FRAME:
+               {
+                       vl_mc_type = VL_FRAME_MC;
+                       break;
+               }
+               case XVMC_PREDICTION_FIELD:
+               {
+                       vl_mc_type = VL_FIELD_MC;
+                       break;
+               }
+               case XVMC_PREDICTION_DUAL_PRIME:
+               {
+                       vl_mc_type = VL_DUAL_PRIME_MC;
+                       break;
+               }
+               default:
+                       assert(0);
+       }
+       
+       return vl_mc_type;
+}
+
 Status XvMCCreateSurface(Display *display, XvMCContext *context, XvMCSurface *surface)
 {
        struct VL_CONTEXT *vl_ctx;
@@ -155,7 +183,7 @@ Status XvMCRenderSurface
                                flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
                                macroblocks->macro_blocks[i].x,
                                macroblocks->macro_blocks[i].y,
-                               macroblocks->macro_blocks[i].motion_type == XVMC_PREDICTION_FRAME ? VL_FIELD_MC : VL_FRAME_MC,
+                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
                                &motion_vector,
                                macroblocks->macro_blocks[i].coded_block_pattern,
                                macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
@@ -188,7 +216,7 @@ Status XvMCRenderSurface
                                flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
                                macroblocks->macro_blocks[i].x,
                                macroblocks->macro_blocks[i].y,
-                               macroblocks->macro_blocks[i].motion_type == XVMC_PREDICTION_FRAME ? VL_FIELD_MC : VL_FRAME_MC,
+                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
                                &motion_vector,
                                macroblocks->macro_blocks[i].coded_block_pattern,
                                macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,
@@ -233,7 +261,7 @@ Status XvMCRenderSurface
                                flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST,
                                macroblocks->macro_blocks[i].x,
                                macroblocks->macro_blocks[i].y,
-                               macroblocks->macro_blocks[i].motion_type == XVMC_PREDICTION_FRAME ? VL_FIELD_MC : VL_FRAME_MC,
+                               MotionToVL(macroblocks->macro_blocks[i].motion_type),
                                motion_vector,
                                macroblocks->macro_blocks[i].coded_block_pattern,
                                macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED,