Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor.c
index ba23435f698240257bf29fe798937e66dab6dbed..bafe232877e20eeeba1a3b6d0f0d8b2188dc4329 100644 (file)
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include "vl_compositor.h"
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <util/u_inlines.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
 #include <util/u_memory.h>
+#include <tgsi/tgsi_ureg.h>
 #include "vl_csc.h"
-#include "vl_shader_build.h"
-
-struct vertex2f
-{
-   float x, y;
-};
-
-struct vertex4f
-{
-   float x, y, z, w;
-};
 
 struct vertex_shader_consts
 {
@@ -58,172 +46,93 @@ struct fragment_shader_consts
    float matrix[16];
 };
 
-/*
- * Represents 2 triangles in a strip in normalized coords.
- * Used to render the surface onto the frame buffer.
- */
-static const struct vertex2f surface_verts[4] =
+static bool
+u_video_rects_equal(struct pipe_video_rect *a, struct pipe_video_rect *b)
 {
-   {0.0f, 0.0f},
-   {0.0f, 1.0f},
-   {1.0f, 0.0f},
-   {1.0f, 1.0f}
-};
+   assert(a && b);
+
+   if (a->x != b->x)
+      return false;
+   if (a->y != b->y)
+      return false;
+   if (a->w != b->w)
+      return false;
+   if (a->h != b->h)
+      return false;
 
-/*
- * Represents texcoords for the above. We can use the position values directly.
- * TODO: Duplicate these in the shader, no need to create a buffer.
- */
-static const struct vertex2f *surface_texcoords = surface_verts;
+   return true;
+}
 
-static void
+static bool
 create_vert_shader(struct vl_compositor *c)
 {
-   const unsigned max_tokens = 50;
-
-   struct pipe_shader_state vs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex;
+   struct ureg_dst o_vpos, o_vtex;
 
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
-   unsigned i;
-
-   assert(c);
-
-   tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
-   header = (struct tgsi_header*)&tokens[0];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-   ti = 2;
-
-   /*
-    * decl i0             ; Vertex pos
-    * decl i1             ; Vertex texcoords
-    */
-   for (i = 0; i < 2; i++) {
-      decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl c0             ; Scaling vector to scale vertex pos rect to destination size
-    * decl c1             ; Translation vector to move vertex pos rect into position
-    * decl c2             ; Scaling vector to scale texcoord rect to source size
-    * decl c3             ; Translation vector to move texcoord rect into position
-    */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * decl o0             ; Vertex pos
-    * decl o1             ; Vertex texcoords
-    */
-   for (i = 0; i < 2; i++) {
-      decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
 
-   /* decl t0, t1 */
-   decl = vl_decl_temps(0, 1);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   vpos = ureg_DECL_vs_input(shader, 0);
+   vtex = ureg_DECL_vs_input(shader, 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
 
    /*
-    * mad o0, i0, c0, c1  ; Scale and translate unit output rect to destination size and pos
-    * mad o1, i1, c2, c3  ; Scale and translate unit texcoord rect to source size and pos
+    * o_vpos = vpos
+    * o_vtex = vtex
     */
-   for (i = 0; i < 2; ++i) {
-      inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_MOV(shader, o_vpos, vpos);
+   ureg_MOV(shader, o_vtex, vtex);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   c->vertex_shader = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->vertex_shader)
+      return false;
 
-   vs.tokens = tokens;
-   c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs);
-   FREE(tokens);
+   return true;
 }
 
-static void
+static bool
 create_frag_shader(struct vl_compositor *c)
 {
-   const unsigned max_tokens = 50;
-
-   struct pipe_shader_state fs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src csc[4];
+   struct ureg_src sampler;
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
    unsigned i;
 
-   assert(c);
-
-   tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
-   header = (struct tgsi_header*)&tokens[0];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-   ti = 2;
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
 
-   /* decl i0             ; Texcoords for s0 */
-   decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 4; ++i)
+      csc[i] = ureg_DECL_constant(shader, i);
+   sampler = ureg_DECL_sampler(shader, 0);
+   texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * decl c0-c3          ; CSC matrix c0-c3
+    * texel = tex(tc, sampler)
+    * fragment = csc * texel
     */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl o0             ; Fragment color */
-   decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl t0 */
-   decl = vl_decl_temps(0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl s0             ; Sampler for tex containing picture to display */
-   decl = vl_decl_samplers(0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* tex2d t0, i0, s0    ; Read src pixel */
-   inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   for (i = 0; i < 4; ++i)
+      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
 
-   /*
-    * dp4 o0.x, t0, c0    ; Multiply pixel by the color conversion matrix
-    * dp4 o0.y, t0, c1
-    * dp4 o0.z, t0, c2
-    * dp4 o0.w, t0, c3
-    */
-   for (i = 0; i < 4; ++i) {
-      inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i);
-      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-   assert(ti <= max_tokens);
+   c->fragment_shader = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->fragment_shader)
+      return false;
 
-   fs.tokens = tokens;
-   c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs);
-   FREE(tokens);
+   return true;
 }
 
 static bool
@@ -251,14 +160,14 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-       
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-       
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
 }
 
@@ -276,7 +185,7 @@ init_shaders(struct vl_compositor *c)
 static void cleanup_shaders(struct vl_compositor *c)
 {
    assert(c);
-       
+
    c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
    c->pipe->delete_fs_state(c->pipe, c->fragment_shader);
 }
@@ -287,81 +196,32 @@ init_buffers(struct vl_compositor *c)
    struct fragment_shader_consts fsc;
 
    assert(c);
-       
+
    /*
-    * Create our vertex buffer and vertex buffer element
-    * VB contains 4 vertices that render a quad covering the entire window
-    * to display a rendered surface
-    * Quad is rendered as a tri strip
+    * Create our vertex buffer and vertex buffer elements
     */
-   c->vertex_bufs[0].stride = sizeof(struct vertex2f);
-   c->vertex_bufs[0].max_index = 3;
-   c->vertex_bufs[0].buffer_offset = 0;
-   c->vertex_bufs[0].buffer = pipe_buffer_create
+   c->vertex_buf.stride = sizeof(struct vertex4f);
+   c->vertex_buf.max_index = (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 - 1;
+   c->vertex_buf.buffer_offset = 0;
+   c->vertex_buf.buffer = pipe_buffer_create
    (
       c->pipe->screen,
       1,
       PIPE_BUFFER_USAGE_VERTEX,
-      sizeof(struct vertex2f) * 4
+      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6
    );
 
-   memcpy
-   (
-      pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-      surface_verts,
-      sizeof(struct vertex2f) * 4
-   );
-
-   pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
-
    c->vertex_elems[0].src_offset = 0;
    c->vertex_elems[0].instance_divisor = 0;
    c->vertex_elems[0].vertex_buffer_index = 0;
    c->vertex_elems[0].nr_components = 2;
    c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /*
-    * Create our texcoord buffer and texcoord buffer element
-    * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
-    */
-   c->vertex_bufs[1].stride = sizeof(struct vertex2f);
-   c->vertex_bufs[1].max_index = 3;
-   c->vertex_bufs[1].buffer_offset = 0;
-   c->vertex_bufs[1].buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      1,
-      PIPE_BUFFER_USAGE_VERTEX,
-      sizeof(struct vertex2f) * 4
-   );
-
-   memcpy
-   (
-      pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-      surface_texcoords,
-      sizeof(struct vertex2f) * 4
-   );
-
-   pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
-
-   c->vertex_elems[1].src_offset = 0;
+   c->vertex_elems[1].src_offset = sizeof(struct vertex2f);
    c->vertex_elems[1].instance_divisor = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
+   c->vertex_elems[1].vertex_buffer_index = 0;
    c->vertex_elems[1].nr_components = 2;
    c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /*
-    * Create our vertex shader's constant buffer
-    * Const buffer contains scaling and translation vectors
-    */
-   c->vs_const_buf = pipe_buffer_create
-   (
-      c->pipe->screen,
-      1,
-      PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
-      sizeof(struct vertex_shader_consts)
-   );
-
    /*
     * Create our fragment shader's constant buffer
     * Const buffer contains the color conversion matrix and bias vectors
@@ -384,19 +244,16 @@ init_buffers(struct vl_compositor *c)
 static void
 cleanup_buffers(struct vl_compositor *c)
 {
-   unsigned i;
-
    assert(c);
-       
-   for (i = 0; i < 2; ++i)
-      pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL);
 
-   pipe_buffer_reference(&c->vs_const_buf, NULL);
+   pipe_buffer_reference(&c->vertex_buf.buffer, NULL);
    pipe_buffer_reference(&c->fs_const_buf, NULL);
 }
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
 {
+   unsigned i;
+
    assert(compositor);
 
    memset(compositor, 0, sizeof(struct vl_compositor));
@@ -415,21 +272,196 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
       return false;
    }
 
+   compositor->fb_state.width = 0;
+   compositor->fb_state.height = 0;
+   compositor->bg = NULL;
+   compositor->dirty_bg = false;
+   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
+      compositor->layers[i] = NULL;
+   compositor->dirty_layers = 0;
+
    return true;
 }
 
 void vl_compositor_cleanup(struct vl_compositor *compositor)
 {
    assert(compositor);
-       
+
    cleanup_buffers(compositor);
    cleanup_shaders(compositor);
    cleanup_pipe_state(compositor);
 }
 
+void vl_compositor_set_background(struct vl_compositor *compositor,
+                                 struct pipe_texture *bg, struct pipe_video_rect *bg_src_rect)
+{
+   assert(compositor);
+   assert((bg && bg_src_rect) || (!bg && !bg_src_rect));
+
+   if (compositor->bg != bg ||
+       !u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect)) {
+      pipe_texture_reference(&compositor->bg, bg);
+      /*if (!u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect))*/
+         compositor->bg_src_rect = *bg_src_rect;
+      compositor->dirty_bg = true;
+   }
+}
+
+void vl_compositor_set_layers(struct vl_compositor *compositor,
+                              struct pipe_texture *layers[],
+                              struct pipe_video_rect *src_rects[],
+                              struct pipe_video_rect *dst_rects[],
+                              unsigned num_layers)
+{
+   unsigned i;
+
+   assert(compositor);
+   assert(num_layers <= VL_COMPOSITOR_MAX_LAYERS);
+
+   for (i = 0; i < num_layers; ++i)
+   {
+      assert((layers[i] && src_rects[i] && dst_rects[i]) ||
+             (!layers[i] && !src_rects[i] && !dst_rects[i]));
+
+      if (compositor->layers[i] != layers[i] ||
+          !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
+          !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
+      {
+         pipe_texture_reference(&compositor->layers[i], layers[i]);
+         /*if (!u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]))*/
+            compositor->layer_src_rects[i] = *src_rects[i];
+         /*if (!u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))*/
+            compositor->layer_dst_rects[i] = *dst_rects[i];
+         compositor->dirty_layers |= 1 << i;
+      }
+   }
+
+   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
+      pipe_texture_reference(&compositor->layers[i], NULL);
+}
+
+static void gen_rect_verts(unsigned pos,
+                           struct pipe_video_rect *src_rect,
+                           struct vertex2f *src_inv_size,
+                           struct pipe_video_rect *dst_rect,
+                           struct vertex2f *dst_inv_size,
+                           struct vertex4f *vb)
+{
+   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 2);
+   assert(src_rect);
+   assert(src_inv_size);
+   assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
+   assert(vb);
+
+   vb[pos * 6 + 0].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 0].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 0].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 0].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 1].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 1].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 1].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 1].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+
+   vb[pos * 6 + 2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 2].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 2].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 3].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 3].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 3].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 3].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 4].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 4].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 4].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 4].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+
+   vb[pos * 6 + 5].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 5].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 5].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 5].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+}
+
+static unsigned gen_data(struct vl_compositor *c,
+                         struct pipe_texture *src_surface,
+                         struct pipe_video_rect *src_rect,
+                         struct pipe_video_rect *dst_rect,
+                         struct pipe_texture **textures)
+{
+   void *vb;
+   unsigned num_rects = 0;
+   unsigned i;
+
+   assert(c);
+   assert(src_surface);
+   assert(src_rect);
+   assert(dst_rect);
+   assert(textures);
+
+   vb = pipe_buffer_map(c->pipe->screen, c->vertex_buf.buffer,
+                        PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD);
+
+   if (!vb)
+      return 0;
+
+   if (c->dirty_bg) {
+      struct vertex2f bg_inv_size = {1.0f / c->bg->width0, 1.0f / c->bg->height0};
+      gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
+      textures[num_rects] = c->bg;
+      ++num_rects;
+      c->dirty_bg = false;
+   }
+
+   {
+      struct vertex2f src_inv_size = { 1.0f / src_surface->width0, 1.0f / src_surface->height0};
+      gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
+      textures[num_rects] = src_surface;
+      ++num_rects;
+   }
+
+   for (i = 0; c->dirty_layers > 0; i++) {
+      assert(i < VL_COMPOSITOR_MAX_LAYERS);
+
+      if (c->dirty_layers & (1 << i)) {
+         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width0, 1.0f / c->layers[i]->height0};
+         gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
+                        &c->layer_dst_rects[i], &c->fb_inv_size, vb);
+         textures[num_rects] = c->layers[i];
+         ++num_rects;
+         c->dirty_layers &= ~(1 << i);
+      }
+   }
+
+   pipe_buffer_unmap(c->pipe->screen, c->vertex_buf.buffer);
+
+   return num_rects;
+}
+
+static void draw_layers(struct vl_compositor *c,
+                        struct pipe_texture *src_surface,
+                        struct pipe_video_rect *src_rect,
+                        struct pipe_video_rect *dst_rect)
+{
+   unsigned num_rects;
+   struct pipe_texture *textures[VL_COMPOSITOR_MAX_LAYERS + 2];
+   unsigned i;
+
+   assert(c);
+   assert(src_surface);
+   assert(src_rect);
+   assert(dst_rect);
+
+   num_rects = gen_data(c, src_surface, src_rect, dst_rect, textures);
+
+   for (i = 0; i < num_rects; ++i) {
+      c->pipe->set_fragment_sampler_textures(c->pipe, 1, &textures[i]);
+      c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+   }
+}
+
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          /*struct pipe_texture         *backround,
-                          struct pipe_video_rect        *backround_area,*/
                           struct pipe_texture           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
@@ -439,14 +471,8 @@ void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_video_rect        *src_area,
                           struct pipe_texture           *dst_surface,
                           struct pipe_video_rect        *dst_area,
-                          /*unsigned                      num_layers,
-                          struct pipe_texture           *layers,
-                          struct pipe_video_rect        *layer_src_areas,
-                          struct pipe_video_rect        *layer_dst_areas*/
                           struct pipe_fence_handle      **fence)
 {
-   struct vertex_shader_consts *vs_consts;
-
    assert(compositor);
    assert(src_surface);
    assert(src_area);
@@ -454,13 +480,20 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    assert(dst_area);
    assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
-   compositor->fb_state.width = dst_surface->width0;
-   compositor->fb_state.height = dst_surface->height0;
+   if (compositor->fb_state.width != dst_surface->width0) {
+      compositor->fb_inv_size.x = 1.0f / dst_surface->width0;
+      compositor->fb_state.width = dst_surface->width0;
+   }
+   if (compositor->fb_state.height != dst_surface->height0) {
+      compositor->fb_inv_size.y = 1.0f / dst_surface->height0;
+      compositor->fb_state.height = dst_surface->height0;
+   }
+
    compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
    (
       compositor->pipe->screen,
       dst_surface,
-      0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+      0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ_WRITE
    );
 
    compositor->viewport.scale[0] = compositor->fb_state.width;
@@ -472,49 +505,18 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->viewport.translate[2] = 0;
    compositor->viewport.translate[3] = 0;
 
-   compositor->scissor.maxx = compositor->fb_state.width;
-   compositor->scissor.maxy = compositor->fb_state.height;
-
    compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
    compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
-   compositor->pipe->set_scissor_state(compositor->pipe, &compositor->scissor);
    compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
-   compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
-   compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
+   compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
-   compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
-   vs_consts = pipe_buffer_map
-   (
-      compositor->pipe->screen,
-      compositor->vs_const_buf,
-      PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
-   );
+   draw_layers(compositor, src_surface, src_area, dst_area);
 
-   vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width;
-   vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height;
-   vs_consts->dst_scale.z = 1;
-   vs_consts->dst_scale.w = 1;
-   vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width;
-   vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height;
-   vs_consts->dst_trans.z = 0;
-   vs_consts->dst_trans.w = 0;
-
-   vs_consts->src_scale.x = src_area->w / (float)src_surface->width0;
-   vs_consts->src_scale.y = src_area->h / (float)src_surface->height0;
-   vs_consts->src_scale.z = 1;
-   vs_consts->src_scale.w = 1;
-   vs_consts->src_trans.x = src_area->x / (float)src_surface->width0;
-   vs_consts->src_trans.y = src_area->y / (float)src_surface->height0;
-   vs_consts->src_trans.z = 0;
-   vs_consts->src_trans.w = 0;
-
-   pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf);
-
-   compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+   assert(!compositor->dirty_bg && !compositor->dirty_layers);
    compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
 
    pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
@@ -526,7 +528,8 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float
 
    memcpy
    (
-      pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE),
+      pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf,
+                      PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD),
       mat,
       sizeof(struct fragment_shader_consts)
    );