vl: use pipe_context::bind_sampler_states() if non-null
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor.c
index 0640b1a4565c2487717bf494479c74113df147ee..a2a113999adfb7a4f1ba79c3145357ba6bd7f92a 100644 (file)
  *
  **************************************************************************/
 
-#include "vl_compositor.h"
 #include <assert.h>
-#include <pipe/p_context.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <util/u_keymap.h>
-#include <util/u_sampler.h>
-#include <tgsi/tgsi_ureg.h>
-#include "vl_csc.h"
 
-struct vertex_shader_consts
-{
-   struct vertex4f dst_scale;
-   struct vertex4f dst_trans;
-   struct vertex4f src_scale;
-   struct vertex4f src_trans;
-};
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
 
-struct fragment_shader_consts
-{
-   float matrix[16];
-};
+#include "util/u_memory.h"
+#include "util/u_draw.h"
+#include "util/u_surface.h"
 
-static bool
-u_video_rects_equal(struct pipe_video_rect *a, struct pipe_video_rect *b)
-{
-   assert(a && b);
+#include "tgsi/tgsi_ureg.h"
 
-   if (a->x != b->x)
-      return false;
-   if (a->y != b->y)
-      return false;
-   if (a->w != b->w)
-      return false;
-   if (a->h != b->h)
-      return false;
+#include "vl_csc.h"
+#include "vl_types.h"
+#include "vl_compositor.h"
 
-   return true;
-}
+#define MIN_DIRTY (0)
+#define MAX_DIRTY (1 << 15)
 
-static bool
+enum VS_OUTPUT
+{
+   VS_O_VPOS = 0,
+   VS_O_COLOR = 0,
+   VS_O_VTEX = 0,
+   VS_O_VTOP,
+   VS_O_VBOTTOM,
+};
+
+static void *
 create_vert_shader(struct vl_compositor *c)
 {
    struct ureg_program *shader;
-   struct ureg_src vpos, vtex;
-   struct ureg_dst o_vpos, o_vtex;
+   struct ureg_src vpos, vtex, color;
+   struct ureg_dst tmp;
+   struct ureg_dst o_vpos, o_vtex, o_color;
+   struct ureg_dst o_vtop, o_vbottom;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -78,32 +67,70 @@ create_vert_shader(struct vl_compositor *c)
 
    vpos = ureg_DECL_vs_input(shader, 0);
    vtex = ureg_DECL_vs_input(shader, 1);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
+   color = ureg_DECL_vs_input(shader, 2);
+   tmp = ureg_DECL_temporary(shader);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
+   o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
 
    /*
     * o_vpos = vpos
     * o_vtex = vtex
+    * o_color = color
     */
    ureg_MOV(shader, o_vpos, vpos);
    ureg_MOV(shader, o_vtex, vtex);
+   ureg_MOV(shader, o_color, color);
 
-   ureg_END(shader);
+   /*
+    * tmp.x = vtex.w / 2
+    * tmp.y = vtex.w / 4
+    *
+    * o_vtop.x = vtex.x
+    * o_vtop.y = vtex.y * tmp.x + 0.25f
+    * o_vtop.z = vtex.y * tmp.y + 0.25f
+    * o_vtop.w = 1 / tmp.x
+    *
+    * o_vbottom.x = vtex.x
+    * o_vbottom.y = vtex.y * tmp.x - 0.25f
+    * o_vbottom.z = vtex.y * tmp.y - 0.25f
+    * o_vbottom.w = 1 / tmp.y
+    */
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X),
+            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f));
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+            ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.25f));
+
+   ureg_MOV(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_X), vtex);
+   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.25f));
+   ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.25f));
+   ureg_RCP(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_W),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+
+   ureg_MOV(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_X), vtex);
+   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, -0.25f));
+   ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, -0.25f));
+   ureg_RCP(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_W),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
 
-   c->vertex_shader = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->vertex_shader)
-      return false;
+   ureg_END(shader);
 
-   return true;
+   return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
-static bool
-create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
+static void *
+create_frag_shader_video_buffer(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
-   struct ureg_src csc[4];
-   struct ureg_src sampler;
+   struct ureg_src csc[3];
+   struct ureg_src sampler[3];
    struct ureg_dst texel;
    struct ureg_dst fragment;
    unsigned i;
@@ -112,85 +139,344 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
    if (!shader)
       return false;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
-   for (i = 0; i < 4; ++i)
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 3; ++i) {
       csc[i] = ureg_DECL_constant(shader, i);
-   sampler = ureg_DECL_sampler(shader, 0);
+      sampler[i] = ureg_DECL_sampler(shader, i);
+   }
    texel = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel = tex(tc, sampler)
+    * texel.xyz = tex(tc, sampler[i])
     * fragment = csc * texel
     */
-   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
-   for (i = 0; i < 4; ++i)
+   for (i = 0; i < 3; ++i)
+      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D_ARRAY, tc, sampler[i]);
+
+   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   for (i = 0; i < 3; ++i)
       ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
 
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
-   c->fragment_shader.ycbcr_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader.ycbcr_2_rgb)
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static void *
+create_frag_shader_weave(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src i_tc[2];
+   struct ureg_src csc[3];
+   struct ureg_src sampler[3];
+   struct ureg_dst t_tc[2];
+   struct ureg_dst t_texel[2];
+   struct ureg_dst o_fragment;
+   unsigned i, j;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
       return false;
 
-   return true;
+   i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
+
+   for (i = 0; i < 3; ++i) {
+      csc[i] = ureg_DECL_constant(shader, i);
+      sampler[i] = ureg_DECL_sampler(shader, i);
+   }
+
+   for (i = 0; i < 2; ++i) {
+      t_tc[i] = ureg_DECL_temporary(shader);
+      t_texel[i] = ureg_DECL_temporary(shader);
+   }
+   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /* calculate the texture offsets
+    * t_tc.x = i_tc.x
+    * t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2
+    */
+   for (i = 0; i < 2; ++i) {
+      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]);
+      ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
+               i_tc[i], ureg_imm1f(shader, 0.5f));
+      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]));
+      ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
+               ureg_imm1f(shader, i ? 1.0f : 0.0f));
+      ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
+               ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
+      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
+               ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W));
+      ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z),
+               ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W));
+   }
+
+   /* fetch the texels
+    * texel[0..1].x = tex(t_tc[0..1][0])
+    * texel[0..1].y = tex(t_tc[0..1][1])
+    * texel[0..1].z = tex(t_tc[0..1][2])
+    */
+   for (i = 0; i < 2; ++i)
+      for (j = 0; j < 3; ++j) {
+         struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]),
+            TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+         ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
+                  TGSI_TEXTURE_2D_ARRAY, src, sampler[j]);
+      }
+
+   /* calculate linear interpolation factor
+    * factor = |round(i_tc.y) - i_tc.y| * 2
+    */
+   ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]);
+   ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
+            ureg_src(t_tc[0]), ureg_negate(i_tc[0]));
+   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
+            ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f));
+   ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]),
+            TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z),
+            ureg_src(t_texel[0]), ureg_src(t_texel[1]));
+
+   /* and finally do colour space transformation
+    * fragment = csc * texel
+    */
+   ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+   for (i = 0; i < 3; ++i)
+      ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0]));
+
+   ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   for (i = 0; i < 2; ++i) {
+      ureg_release_temporary(shader, t_texel[i]);
+      ureg_release_temporary(shader, t_tc[i]);
+   }
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
-static bool
-create_frag_shader_rgb_2_rgb(struct vl_compositor *c)
+static void *
+create_frag_shader_palette(struct vl_compositor *c, bool include_cc)
 {
    struct ureg_program *shader;
+   struct ureg_src csc[3];
    struct ureg_src tc;
    struct ureg_src sampler;
+   struct ureg_src palette;
+   struct ureg_dst texel;
    struct ureg_dst fragment;
+   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; include_cc && i < 3; ++i)
+      csc[i] = ureg_DECL_constant(shader, i);
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
    sampler = ureg_DECL_sampler(shader, 0);
+   palette = ureg_DECL_sampler(shader, 1);
+
+   texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel = tex(tc, sampler)
+    * fragment.xyz = tex(texel, palette) * csc
+    * fragment.a = texel.a
+    */
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));
+
+   if (include_cc) {
+      ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette);
+      for (i = 0; i < 3; ++i)
+         ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+   } else {
+      ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+               TGSI_TEXTURE_1D, ureg_src(texel), palette);
+   }
+
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static void *
+create_frag_shader_rgba(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc, color, sampler;
+   struct ureg_dst texel, fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
+   color = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   texel = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
     * fragment = tex(tc, sampler)
     */
-   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_MUL(shader, fragment, ureg_src(texel), color);
    ureg_END(shader);
 
-   c->fragment_shader.rgb_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader.rgb_2_rgb)
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static bool
+init_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->vs = create_vert_shader(c);
+   if (!c->vs) {
+      debug_printf("Unable to create vertex shader.\n");
+      return false;
+   }
+
+   c->fs_video_buffer = create_frag_shader_video_buffer(c);
+   if (!c->fs_video_buffer) {
+      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
       return false;
+   }
+
+   c->fs_weave = create_frag_shader_weave(c);
+   if (!c->fs_weave) {
+      debug_printf("Unable to create YCbCr-to-RGB weave fragment shader.\n");
+      return false;
+   }
+
+   c->fs_palette.yuv = create_frag_shader_palette(c, true);
+   if (!c->fs_palette.yuv) {
+      debug_printf("Unable to create YUV-Palette-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_palette.rgb = create_frag_shader_palette(c, false);
+   if (!c->fs_palette.rgb) {
+      debug_printf("Unable to create RGB-Palette-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_rgba = create_frag_shader_rgba(c);
+   if (!c->fs_rgba) {
+      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
+      return false;
+   }
 
    return true;
 }
 
+static void cleanup_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->pipe->delete_vs_state(c->pipe, c->vs);
+   c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer);
+   c->pipe->delete_fs_state(c->pipe, c->fs_weave);
+   c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv);
+   c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb);
+   c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+}
+
 static bool
 init_pipe_state(struct vl_compositor *c)
 {
+   struct pipe_rasterizer_state rast;
    struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   unsigned i;
 
    assert(c);
 
    c->fb_state.nr_cbufs = 1;
    c->fb_state.zsbuf = NULL;
 
+   memset(&sampler, 0, sizeof(sampler));
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
    sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
    sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
    sampler.compare_func = PIPE_FUNC_ALWAYS;
    sampler.normalized_coords = 1;
-   /*sampler.lod_bias = ;*/
-   /*sampler.min_lod = ;*/
-   /*sampler.max_lod = ;*/
-   /*sampler.border_color[i] = ;*/
-   /*sampler.max_anisotropy = ;*/
-   c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   c->sampler_linear = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   c->sampler_nearest = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   memset(&blend, 0, sizeof blend);
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   c->blend_clear = c->pipe->create_blend_state(c->pipe, &blend);
+
+   blend.rt[0].blend_enable = 1;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   c->blend_add = c->pipe->create_blend_state(c->pipe, &blend);
+
+   memset(&rast, 0, sizeof rast);
+   rast.flatshade = 0;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.scissor = 1;
+   rast.line_width = 1;
+   rast.point_size_per_vertex = 1;
+   rast.offset_units = 1;
+   rast.offset_scale = 1;
+   rast.half_pixel_center = 1;
+   rast.bottom_edge_rule = 1;
+   rast.depth_clip = 1;
+
+   c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
+
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa);
+   c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa);
 
    return true;
 }
@@ -199,60 +485,48 @@ static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
 
-   c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   c->pipe->bind_vs_state(c->pipe, NULL);
+   c->pipe->bind_fs_state(c->pipe, NULL);
+
+   c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa);
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
+   c->pipe->delete_blend_state(c->pipe, c->blend_clear);
+   c->pipe->delete_blend_state(c->pipe, c->blend_add);
+   c->pipe->delete_rasterizer_state(c->pipe, c->rast);
 }
 
 static bool
-init_shaders(struct vl_compositor *c)
+create_vertex_buffer(struct vl_compositor *c)
 {
    assert(c);
 
-   if (!create_vert_shader(c)) {
-      debug_printf("Unable to create vertex shader.\n");
-      return false;
-   }
-   if (!create_frag_shader_ycbcr_2_rgb(c)) {
-      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
-      return false;
-   }
-   if (!create_frag_shader_rgb_2_rgb(c)) {
-      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
-      return false;
-   }
-
-   return true;
-}
-
-static void cleanup_shaders(struct vl_compositor *c)
-{
-   assert(c);
+   pipe_resource_reference(&c->vertex_buf.buffer, NULL);
+   c->vertex_buf.buffer = pipe_buffer_create
+   (
+      c->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STREAM,
+      c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4
+   );
 
-   c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.ycbcr_2_rgb);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.rgb_2_rgb);
+   return c->vertex_buf.buffer != NULL;
 }
 
 static bool
 init_buffers(struct vl_compositor *c)
 {
-   struct fragment_shader_consts fsc;
-   struct pipe_vertex_element vertex_elems[2];
+   struct pipe_vertex_element vertex_elems[3];
 
    assert(c);
 
    /*
     * Create our vertex buffer and vertex buffer elements
     */
-   c->vertex_buf.stride = sizeof(struct vertex4f);
-   c->vertex_buf.max_index = (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 - 1;
+   c->vertex_buf.stride = sizeof(struct vertex2f) + sizeof(struct vertex4f) * 2;
    c->vertex_buf.buffer_offset = 0;
-   /* XXX: Create with DYNAMIC or STREAM */
-   c->vertex_buf.buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6
-   );
+   create_vertex_buffer(c);
 
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
@@ -261,24 +535,12 @@ init_buffers(struct vl_compositor *c)
    vertex_elems[1].src_offset = sizeof(struct vertex2f);
    vertex_elems[1].instance_divisor = 0;
    vertex_elems[1].vertex_buffer_index = 0;
-   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-   c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
-
-   /*
-    * Create our fragment shader's constant buffer
-    * Const buffer contains the color conversion matrix and bias vectors
-    */
-   /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
-   c->fs_const_buf = pipe_buffer_create
-   (
-      c->pipe->screen,
-      PIPE_BIND_CONSTANT_BUFFER,
-      sizeof(struct fragment_shader_consts)
-   );
-
-   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix);
-
-   vl_compositor_set_csc_matrix(c, fsc.matrix);
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) + sizeof(struct vertex4f);
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 3, vertex_elems);
 
    return true;
 }
@@ -290,347 +552,550 @@ cleanup_buffers(struct vl_compositor *c)
 
    c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state);
    pipe_resource_reference(&c->vertex_buf.buffer, NULL);
-   pipe_resource_reference(&c->fs_const_buf, NULL);
 }
 
-static void
-texview_map_delete(const struct keymap *map,
-                   const void *key, void *data,
-                   void *user)
+static INLINE struct u_rect
+default_rect(struct vl_compositor_layer *layer)
 {
-   struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
+   struct pipe_resource *res = layer->sampler_views[0]->texture;
+   struct u_rect rect = { 0, res->width0, 0, res->height0 * res->array_size };
+   return rect;
+}
 
-   assert(map);
-   assert(key);
-   assert(data);
-   assert(user);
+static INLINE struct vertex2f
+calc_topleft(struct vertex2f size, struct u_rect rect)
+{
+   struct vertex2f res = { rect.x0 / size.x, rect.y0 / size.y };
+   return res;
+}
 
-   pipe_sampler_view_reference(&sv, NULL);
+static INLINE struct vertex2f
+calc_bottomright(struct vertex2f size, struct u_rect rect)
+{
+   struct vertex2f res = { rect.x1 / size.x, rect.y1 / size.y };
+   return res;
 }
 
-bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
+static INLINE void
+calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
+                 struct u_rect src, struct u_rect dst)
 {
-   unsigned i;
+   struct vertex2f size =  { width, height };
+
+   layer->src.tl = calc_topleft(size, src);
+   layer->src.br = calc_bottomright(size, src);
+   layer->dst.tl = calc_topleft(size, dst);
+   layer->dst.br = calc_bottomright(size, dst);
+   layer->zw.x = 0.0f;
+   layer->zw.y = size.y;
+}
 
-   assert(compositor);
+static void
+gen_rect_verts(struct vertex2f *vb, struct vl_compositor_layer *layer)
+{
+   assert(vb && layer);
+
+   vb[ 0].x = layer->dst.tl.x;
+   vb[ 0].y = layer->dst.tl.y;
+   vb[ 1].x = layer->src.tl.x;
+   vb[ 1].y = layer->src.tl.y;
+   vb[ 2] = layer->zw;
+   vb[ 3].x = layer->colors[0].x;
+   vb[ 3].y = layer->colors[0].y;
+   vb[ 4].x = layer->colors[0].z;
+   vb[ 4].y = layer->colors[0].w;
+
+   vb[ 5].x = layer->dst.br.x;
+   vb[ 5].y = layer->dst.tl.y;
+   vb[ 6].x = layer->src.br.x;
+   vb[ 6].y = layer->src.tl.y;
+   vb[ 7] = layer->zw;
+   vb[ 8].x = layer->colors[1].x;
+   vb[ 8].y = layer->colors[1].y;
+   vb[ 9].x = layer->colors[1].z;
+   vb[ 9].y = layer->colors[1].w;
+
+   vb[10].x = layer->dst.br.x;
+   vb[10].y = layer->dst.br.y;
+   vb[11].x = layer->src.br.x;
+   vb[11].y = layer->src.br.y;
+   vb[12] = layer->zw;
+   vb[13].x = layer->colors[2].x;
+   vb[13].y = layer->colors[2].y;
+   vb[14].x = layer->colors[2].z;
+   vb[14].y = layer->colors[2].w;
+
+   vb[15].x = layer->dst.tl.x;
+   vb[15].y = layer->dst.br.y;
+   vb[16].x = layer->src.tl.x;
+   vb[16].y = layer->src.br.y;
+   vb[17] = layer->zw;
+   vb[18].x = layer->colors[3].x;
+   vb[18].y = layer->colors[3].y;
+   vb[19].x = layer->colors[3].z;
+   vb[19].y = layer->colors[3].w;
+}
 
-   memset(compositor, 0, sizeof(struct vl_compositor));
+static INLINE struct u_rect
+calc_drawn_area(struct vl_compositor_state *s, struct vl_compositor_layer *layer)
+{
+   struct u_rect result;
+
+   // scale
+   result.x0 = layer->dst.tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
+   result.y0 = layer->dst.tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
+   result.x1 = layer->dst.br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
+   result.y1 = layer->dst.br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
+
+   // and clip
+   result.x0 = MAX2(result.x0, s->scissor.minx);
+   result.y0 = MAX2(result.y0, s->scissor.miny);
+   result.x1 = MIN2(result.x1, s->scissor.maxx);
+   result.y1 = MIN2(result.y1, s->scissor.maxy);
+   return result;
+}
 
-   compositor->pipe = pipe;
+static void
+gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty)
+{
+   struct vertex2f *vb;
+   struct pipe_transfer *buf_transfer;
+   unsigned i;
 
-   compositor->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
-                                             texview_map_delete);
-   if (!compositor->texview_map)
-      return false;
+   assert(c);
 
-   if (!init_pipe_state(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
-      return false;
+   vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DONTBLOCK,
+                        &buf_transfer);
+
+   if (!vb) {
+      // If buffer is still locked from last draw create a new one
+      create_vertex_buffer(c);
+      vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+                           PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
+                           &buf_transfer);
    }
-   if (!init_shaders(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
-      cleanup_pipe_state(compositor);
-      return false;
+
+   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
+      if (s->used_layers & (1 << i)) {
+         struct vl_compositor_layer *layer = &s->layers[i];
+         gen_rect_verts(vb, layer);
+         vb += 20;
+
+         if (!layer->viewport_valid) {
+            layer->viewport.scale[0] = c->fb_state.width;
+            layer->viewport.scale[1] = c->fb_state.height;
+            layer->viewport.translate[0] = 0;
+            layer->viewport.translate[1] = 0;
+         }
+
+         if (dirty && layer->clearing) {
+            struct u_rect drawn = calc_drawn_area(s, layer);
+            if (
+             dirty->x0 >= drawn.x0 &&
+             dirty->y0 >= drawn.y0 &&
+             dirty->x1 <= drawn.x1 &&
+             dirty->y1 <= drawn.y1) {
+
+               // We clear the dirty area anyway, no need for clear_render_target
+               dirty->x0 = dirty->y0 = MAX_DIRTY;
+               dirty->x1 = dirty->y1 = MIN_DIRTY;
+            }
+         }
+      }
    }
-   if (!init_buffers(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
-      cleanup_shaders(compositor);
-      cleanup_pipe_state(compositor);
-      return false;
+
+   pipe_buffer_unmap(c->pipe, buf_transfer);
+}
+
+static void
+draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty)
+{
+   unsigned vb_index, i;
+
+   assert(c);
+
+   for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      if (s->used_layers & (1 << i)) {
+         struct vl_compositor_layer *layer = &s->layers[i];
+         struct pipe_sampler_view **samplers = &layer->sampler_views[0];
+         unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
+         void *blend = layer->blend ? layer->blend : i ? c->blend_add : c->blend_clear;
+
+         c->pipe->bind_blend_state(c->pipe, blend);
+         c->pipe->set_viewport_states(c->pipe, 0, 1, &layer->viewport);
+         c->pipe->bind_fs_state(c->pipe, layer->fs);
+         if (c->pipe->bind_sampler_states)
+            c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_FRAGMENT, 0,
+                                         num_sampler_views, layer->samplers);
+         else
+            c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views,
+                                                  layer->samplers);
+         c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
+         util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
+         vb_index++;
+
+         if (dirty) {
+            // Remember the currently drawn area as dirty for the next draw command
+            struct u_rect drawn = calc_drawn_area(s, layer);
+            dirty->x0 = MIN2(drawn.x0, dirty->x0);
+            dirty->y0 = MIN2(drawn.y0, dirty->y0);
+            dirty->x1 = MAX2(drawn.x1, dirty->x1);
+            dirty->y1 = MAX2(drawn.y1, dirty->y1);
+         }
+      }
    }
+}
 
-   compositor->fb_state.width = 0;
-   compositor->fb_state.height = 0;
-   compositor->bg = NULL;
-   compositor->dirty_bg = false;
-   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
-      compositor->layers[i] = NULL;
-   compositor->dirty_layers = 0;
+void
+vl_compositor_reset_dirty_area(struct u_rect *dirty)
+{
+   assert(dirty);
 
-   return true;
+   dirty->x0 = dirty->y0 = MIN_DIRTY;
+   dirty->x1 = dirty->y1 = MAX_DIRTY;
 }
 
-void vl_compositor_cleanup(struct vl_compositor *compositor)
+void
+vl_compositor_set_clear_color(struct vl_compositor_state *s, union pipe_color_union *color)
 {
-   assert(compositor);
+   assert(s);
+   assert(color);
 
-   util_delete_keymap(compositor->texview_map, compositor->pipe);
-   cleanup_buffers(compositor);
-   cleanup_shaders(compositor);
-   cleanup_pipe_state(compositor);
+   s->clear_color = *color;
 }
 
-void vl_compositor_set_background(struct vl_compositor *compositor,
-                                 struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect)
+void
+vl_compositor_get_clear_color(struct vl_compositor_state *s, union pipe_color_union *color)
 {
-   assert(compositor);
-   assert((bg && bg_src_rect) || (!bg && !bg_src_rect));
+   assert(s);
+   assert(color);
+
+   *color = s->clear_color;
+}
 
-   if (compositor->bg != bg ||
-       !u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect)) {
-      pipe_surface_reference(&compositor->bg, bg);
-      /*if (!u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect))*/
-         compositor->bg_src_rect = *bg_src_rect;
-      compositor->dirty_bg = true;
+void
+vl_compositor_clear_layers(struct vl_compositor_state *s)
+{
+   unsigned i, j;
+
+   assert(s);
+
+   s->used_layers = 0;
+   for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      struct vertex4f v_one = { 1.0f, 1.0f, 1.0f, 1.0f };
+      s->layers[i].clearing = i ? false : true;
+      s->layers[i].blend = NULL;
+      s->layers[i].fs = NULL;
+      s->layers[i].viewport.scale[2] = 1;
+      s->layers[i].viewport.scale[3] = 1;
+      s->layers[i].viewport.translate[2] = 0;
+      s->layers[i].viewport.translate[3] = 0;
+
+      for ( j = 0; j < 3; j++)
+         pipe_sampler_view_reference(&s->layers[i].sampler_views[j], NULL);
+      for ( j = 0; j < 4; ++j)
+         s->layers[i].colors[j] = v_one;
    }
 }
 
-void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_surface *layers[],
-                              struct pipe_video_rect *src_rects[],
-                              struct pipe_video_rect *dst_rects[],
-                              unsigned num_layers)
+void
+vl_compositor_cleanup(struct vl_compositor *c)
 {
-   unsigned i;
+   assert(c);
 
-   assert(compositor);
-   assert(num_layers <= VL_COMPOSITOR_MAX_LAYERS);
-
-   for (i = 0; i < num_layers; ++i)
-   {
-      assert((layers[i] && src_rects[i] && dst_rects[i]) ||
-             (!layers[i] && !src_rects[i] && !dst_rects[i]));
-
-      if (compositor->layers[i] != layers[i] ||
-          !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
-          !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
-      {
-         pipe_surface_reference(&compositor->layers[i], layers[i]);
-         /*if (!u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]))*/
-            compositor->layer_src_rects[i] = *src_rects[i];
-         /*if (!u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))*/
-            compositor->layer_dst_rects[i] = *dst_rects[i];
-         compositor->dirty_layers |= 1 << i;
-      }
+   cleanup_buffers(c);
+   cleanup_shaders(c);
+   cleanup_pipe_state(c);
+}
+
+void
+vl_compositor_set_csc_matrix(struct vl_compositor_state *s, vl_csc_matrix const *matrix)
+{
+   struct pipe_transfer *buf_transfer;
+
+   assert(s);
 
-      if (layers[i])
-         compositor->dirty_layers |= 1 << i;
+   memcpy
+   (
+      pipe_buffer_map(s->pipe, s->csc_matrix,
+                      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
+                      &buf_transfer),
+      matrix,
+      sizeof(vl_csc_matrix)
+   );
+
+   pipe_buffer_unmap(s->pipe, buf_transfer);
+}
+
+void
+vl_compositor_set_dst_clip(struct vl_compositor_state *s, struct u_rect *dst_clip)
+{
+   assert(s);
+
+   s->scissor_valid = dst_clip != NULL;
+   if (dst_clip) {
+      s->scissor.minx = dst_clip->x0;
+      s->scissor.miny = dst_clip->y0;
+      s->scissor.maxx = dst_clip->x1;
+      s->scissor.maxy = dst_clip->y1;
    }
+}
 
-   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
-      pipe_surface_reference(&compositor->layers[i], NULL);
-}
-
-static void gen_rect_verts(unsigned pos,
-                           struct pipe_video_rect *src_rect,
-                           struct vertex2f *src_inv_size,
-                           struct pipe_video_rect *dst_rect,
-                           struct vertex2f *dst_inv_size,
-                           struct vertex4f *vb)
-{
-   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 2);
-   assert(src_rect);
-   assert(src_inv_size);
-   assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
-   assert(vb);
-
-   vb[pos * 6 + 0].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 0].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 0].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 0].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 1].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 1].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 1].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 1].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-
-   vb[pos * 6 + 2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 2].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 2].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 3].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 3].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 3].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 3].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 4].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 4].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 4].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 4].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-
-   vb[pos * 6 + 5].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 5].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 5].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 5].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-}
-
-static unsigned gen_data(struct vl_compositor *c,
-                         struct pipe_surface *src_surface,
-                         struct pipe_video_rect *src_rect,
-                         struct pipe_video_rect *dst_rect,
-                         struct pipe_surface **textures,
-                         void **frag_shaders)
-{
-   void *vb;
-   struct pipe_transfer *buf_transfer;
-   unsigned num_rects = 0;
-   unsigned i;
+void
+vl_compositor_set_layer_blend(struct vl_compositor_state *s,
+                              unsigned layer, void *blend,
+                              bool is_clearing)
+{
+   assert(s && blend);
 
-   assert(c);
-   assert(src_surface);
-   assert(src_rect);
-   assert(dst_rect);
-   assert(textures);
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
-   vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
-                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-                        &buf_transfer);
+   s->layers[layer].clearing = is_clearing;
+   s->layers[layer].blend = blend;
+}
+
+void
+vl_compositor_set_layer_dst_area(struct vl_compositor_state *s,
+                                 unsigned layer, struct u_rect *dst_area)
+{
+   assert(s);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
-   if (!vb)
-      return 0;
-
-   if (c->dirty_bg) {
-      struct vertex2f bg_inv_size = {1.0f / c->bg->width, 1.0f / c->bg->height};
-      gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
-      textures[num_rects] = c->bg;
-      /* XXX: Hack */
-      frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
-      ++num_rects;
-      c->dirty_bg = false;
+   s->layers[layer].viewport_valid = dst_area != NULL;
+   if (dst_area) {
+      s->layers[layer].viewport.scale[0] = dst_area->x1 - dst_area->x0;
+      s->layers[layer].viewport.scale[1] = dst_area->y1 - dst_area->y0;
+      s->layers[layer].viewport.translate[0] = dst_area->x0;
+      s->layers[layer].viewport.translate[1] = dst_area->y0;
    }
+}
+
+void
+vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
+                               struct vl_compositor *c,
+                               unsigned layer,
+                               struct pipe_video_buffer *buffer,
+                               struct u_rect *src_rect,
+                               struct u_rect *dst_rect,
+                               enum vl_compositor_deinterlace deinterlace)
+{
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
+
+   assert(s && c && buffer);
 
-   {
-      struct vertex2f src_inv_size = { 1.0f / src_surface->width, 1.0f / src_surface->height};
-      gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
-      textures[num_rects] = src_surface;
-      /* XXX: Hack, sort of */
-      frag_shaders[num_rects] = c->fragment_shader.ycbcr_2_rgb;
-      ++num_rects;
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   s->used_layers |= 1 << layer;
+   sampler_views = buffer->get_sampler_view_components(buffer);
+   for (i = 0; i < 3; ++i) {
+      s->layers[layer].samplers[i] = c->sampler_linear;
+      pipe_sampler_view_reference(&s->layers[layer].sampler_views[i], sampler_views[i]);
    }
 
-   for (i = 0; c->dirty_layers > 0; i++) {
-      assert(i < VL_COMPOSITOR_MAX_LAYERS);
-
-      if (c->dirty_layers & (1 << i)) {
-         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width, 1.0f / c->layers[i]->height};
-         gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
-                        &c->layer_dst_rects[i], &c->fb_inv_size, vb);
-         textures[num_rects] = c->layers[i];
-         /* XXX: Hack */
-         frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
-         ++num_rects;
-         c->dirty_layers &= ~(1 << i);
+   calc_src_and_dst(&s->layers[layer], buffer->width, buffer->height,
+                    src_rect ? *src_rect : default_rect(&s->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+
+   if (buffer->interlaced) {
+      float half_a_line = 0.5f / s->layers[layer].zw.y;
+      switch(deinterlace) {
+      case VL_COMPOSITOR_WEAVE:
+         s->layers[layer].fs = c->fs_weave;
+         break;
+
+      case VL_COMPOSITOR_BOB_TOP:
+         s->layers[layer].zw.x = 0.0f;
+         s->layers[layer].src.tl.y += half_a_line;
+         s->layers[layer].src.br.y += half_a_line;
+         s->layers[layer].fs = c->fs_video_buffer;
+         break;
+
+      case VL_COMPOSITOR_BOB_BOTTOM:
+         s->layers[layer].zw.x = 1.0f;
+         s->layers[layer].src.tl.y -= half_a_line;
+         s->layers[layer].src.br.y -= half_a_line;
+         s->layers[layer].fs = c->fs_video_buffer;
+         break;
       }
-   }
 
-   pipe_buffer_unmap(c->pipe, c->vertex_buf.buffer, buf_transfer);
+   } else
+      s->layers[layer].fs = c->fs_video_buffer;
+}
+
+void
+vl_compositor_set_palette_layer(struct vl_compositor_state *s,
+                                struct vl_compositor *c,
+                                unsigned layer,
+                                struct pipe_sampler_view *indexes,
+                                struct pipe_sampler_view *palette,
+                                struct u_rect *src_rect,
+                                struct u_rect *dst_rect,
+                                bool include_color_conversion)
+{
+   assert(s && c && indexes && palette);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   s->used_layers |= 1 << layer;
 
-   return num_rects;
+   s->layers[layer].fs = include_color_conversion ?
+      c->fs_palette.yuv : c->fs_palette.rgb;
+
+   s->layers[layer].samplers[0] = c->sampler_linear;
+   s->layers[layer].samplers[1] = c->sampler_nearest;
+   s->layers[layer].samplers[2] = NULL;
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], indexes);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], palette);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL);
+   calc_src_and_dst(&s->layers[layer], indexes->texture->width0, indexes->texture->height0,
+                    src_rect ? *src_rect : default_rect(&s->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
 }
 
-static void draw_layers(struct vl_compositor *c,
-                        struct pipe_surface *src_surface,
-                        struct pipe_video_rect *src_rect,
-                        struct pipe_video_rect *dst_rect)
+void
+vl_compositor_set_rgba_layer(struct vl_compositor_state *s,
+                             struct vl_compositor *c,
+                             unsigned layer,
+                             struct pipe_sampler_view *rgba,
+                             struct u_rect *src_rect,
+                             struct u_rect *dst_rect,
+                             struct vertex4f *colors)
 {
-   unsigned num_rects;
-   struct pipe_surface *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
-   void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 2];
    unsigned i;
 
+   assert(s && c && rgba);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   s->used_layers |= 1 << layer;
+   s->layers[layer].fs = c->fs_rgba;
+   s->layers[layer].samplers[0] = c->sampler_linear;
+   s->layers[layer].samplers[1] = NULL;
+   s->layers[layer].samplers[2] = NULL;
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], rgba);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], NULL);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL);
+   calc_src_and_dst(&s->layers[layer], rgba->texture->width0, rgba->texture->height0,
+                    src_rect ? *src_rect : default_rect(&s->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+
+   if (colors)
+      for (i = 0; i < 4; ++i)
+         s->layers[layer].colors[i] = colors[i];
+}
+
+void
+vl_compositor_render(struct vl_compositor_state *s,
+                     struct vl_compositor       *c,
+                     struct pipe_surface        *dst_surface,
+                     struct u_rect              *dirty_area,
+                     bool                        clear_dirty)
+{
    assert(c);
-   assert(src_surface);
-   assert(src_rect);
-   assert(dst_rect);
-
-   num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces, frag_shaders);
-
-   for (i = 0; i < num_rects; ++i) {
-      boolean delete_view = FALSE;
-      struct pipe_sampler_view *surface_view = (struct pipe_sampler_view*)util_keymap_lookup(c->texview_map,
-                                                                                             &src_surfaces[i]);
-      if (!surface_view) {
-         struct pipe_sampler_view templat;
-         u_sampler_view_default_template(&templat, src_surfaces[i]->texture,
-                                         src_surfaces[i]->texture->format);
-         surface_view = c->pipe->create_sampler_view(c->pipe, src_surfaces[i]->texture,
-                                                     &templat);
-         if (!surface_view)
-            return;
-
-         delete_view = !util_keymap_insert(c->texview_map, &src_surfaces[i],
-                                           surface_view, c->pipe);
-      }
+   assert(dst_surface);
 
-      c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
-      c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
-      c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+   c->fb_state.width = dst_surface->width;
+   c->fb_state.height = dst_surface->height;
+   c->fb_state.cbufs[0] = dst_surface;
+   
+   if (!s->scissor_valid) {
+      s->scissor.minx = 0;
+      s->scissor.miny = 0;
+      s->scissor.maxx = dst_surface->width;
+      s->scissor.maxy = dst_surface->height;
+   }
 
-      if (delete_view) {
-         pipe_sampler_view_reference(&surface_view, NULL);
-      }
+   gen_vertex_data(c, s, dirty_area);
+
+   if (clear_dirty && dirty_area &&
+       (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
+
+      c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
+                                   0, 0, dst_surface->width, dst_surface->height);
+      dirty_area->x0 = dirty_area->y0 = MAX_DIRTY;
+      dirty_area->x1 = dirty_area->y1 = MIN_DIRTY;
    }
+
+   c->pipe->set_scissor_states(c->pipe, 0, 1, &s->scissor);
+   c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
+   c->pipe->bind_vs_state(c->pipe, c->vs);
+   c->pipe->set_vertex_buffers(c->pipe, 0, 1, &c->vertex_buf);
+   c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
+   pipe_set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, s->csc_matrix);
+   c->pipe->bind_rasterizer_state(c->pipe, c->rast);
+
+   draw_layers(c, s, dirty_area);
 }
 
-void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_surface           *src_surface,
-                          enum pipe_mpeg12_picture_type picture_type,
-                          /*unsigned                    num_past_surfaces,
-                          struct pipe_surface           *past_surfaces,
-                          unsigned                      num_future_surfaces,
-                          struct pipe_surface           *future_surfaces,*/
-                          struct pipe_video_rect        *src_area,
-                          struct pipe_surface           *dst_surface,
-                          struct pipe_video_rect        *dst_area,
-                          struct pipe_fence_handle      **fence)
-{
-   assert(compositor);
-   assert(src_surface);
-   assert(src_area);
-   assert(dst_surface);
-   assert(dst_area);
-   assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
+bool
+vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
+{
+   assert(c);
 
-   if (compositor->fb_state.width != dst_surface->width) {
-      compositor->fb_inv_size.x = 1.0f / dst_surface->width;
-      compositor->fb_state.width = dst_surface->width;
-   }
-   if (compositor->fb_state.height != dst_surface->height) {
-      compositor->fb_inv_size.y = 1.0f / dst_surface->height;
-      compositor->fb_state.height = dst_surface->height;
-   }
+   memset(c, 0, sizeof(*c));
 
-   compositor->fb_state.cbufs[0] = dst_surface;
+   c->pipe = pipe;
 
-   compositor->viewport.scale[0] = compositor->fb_state.width;
-   compositor->viewport.scale[1] = compositor->fb_state.height;
-   compositor->viewport.scale[2] = 1;
-   compositor->viewport.scale[3] = 1;
-   compositor->viewport.translate[0] = 0;
-   compositor->viewport.translate[1] = 0;
-   compositor->viewport.translate[2] = 0;
-   compositor->viewport.translate[3] = 0;
+   if (!init_pipe_state(c))
+      return false;
 
-   compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
-   compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
-   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
-   compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
-   compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
-   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
-   compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
+   if (!init_shaders(c)) {
+      cleanup_pipe_state(c);
+      return false;
+   }
 
-   draw_layers(compositor, src_surface, src_area, dst_area);
+   if (!init_buffers(c)) {
+      cleanup_shaders(c);
+      cleanup_pipe_state(c);
+      return false;
+   }
 
-   assert(!compositor->dirty_bg && !compositor->dirty_layers);
-   compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
+   return true;
 }
 
-void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat)
+bool
+vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pipe)
 {
-   struct pipe_transfer *buf_transfer;
+   vl_csc_matrix csc_matrix;
 
-   assert(compositor);
+   assert(s);
 
-   memcpy
+   memset(s, 0, sizeof(*s));
+
+   s->pipe = pipe;
+
+   s->clear_color.f[0] = s->clear_color.f[1] = 0.0f;
+   s->clear_color.f[2] = s->clear_color.f[3] = 0.0f;
+
+   /*
+    * Create our fragment shader's constant buffer
+    * Const buffer contains the color conversion matrix and bias vectors
+    */
+   /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
+   s->csc_matrix = pipe_buffer_create
    (
-      pipe_buffer_map(compositor->pipe, compositor->fs_const_buf,
-                      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-                      &buf_transfer),
-      mat,
-      sizeof(struct fragment_shader_consts)
+      pipe->screen,
+      PIPE_BIND_CONSTANT_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(csc_matrix)
    );
 
-   pipe_buffer_unmap(compositor->pipe, compositor->fs_const_buf,
-                     buf_transfer);
+   vl_compositor_clear_layers(s);
+
+   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, &csc_matrix);
+   vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix);
+
+   return true;
+}
+
+void
+vl_compositor_cleanup_state(struct vl_compositor_state *s)
+{
+   assert(s);
+
+   vl_compositor_clear_layers(s);
+   pipe_resource_reference(&s->csc_matrix, NULL);
 }