u_upload_mgr: remove alignment parameter from u_upload_create
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor.c
index 9666ad1e0478e2a81e56099c3fa099717cedfad7..10ac1712f19202616db93ea845e96cb8fd42ca54 100644 (file)
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -33,6 +33,7 @@
 #include "util/u_memory.h"
 #include "util/u_draw.h"
 #include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
 
 #include "tgsi/tgsi_ureg.h"
 
 
 enum VS_OUTPUT
 {
-   VS_O_VPOS,
-   VS_O_VTEX,
+   VS_O_VPOS = 0,
+   VS_O_COLOR = 0,
+   VS_O_VTEX = 0,
    VS_O_VTOP,
    VS_O_VBOTTOM,
 };
 
-typedef float csc_matrix[16];
-
 static void *
 create_vert_shader(struct vl_compositor *c)
 {
    struct ureg_program *shader;
-   struct ureg_src vpos, vtex;
+   struct ureg_src vpos, vtex, color;
    struct ureg_dst tmp;
-   struct ureg_dst o_vpos, o_vtex;
+   struct ureg_dst o_vpos, o_vtex, o_color;
    struct ureg_dst o_vtop, o_vbottom;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -68,8 +68,10 @@ create_vert_shader(struct vl_compositor *c)
 
    vpos = ureg_DECL_vs_input(shader, 0);
    vtex = ureg_DECL_vs_input(shader, 1);
+   color = ureg_DECL_vs_input(shader, 2);
    tmp = ureg_DECL_temporary(shader);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR);
    o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
    o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
    o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
@@ -77,10 +79,26 @@ create_vert_shader(struct vl_compositor *c)
    /*
     * o_vpos = vpos
     * o_vtex = vtex
+    * o_color = color
     */
    ureg_MOV(shader, o_vpos, vpos);
    ureg_MOV(shader, o_vtex, vtex);
+   ureg_MOV(shader, o_color, color);
 
+   /*
+    * tmp.x = vtex.w / 2
+    * tmp.y = vtex.w / 4
+    *
+    * o_vtop.x = vtex.x
+    * o_vtop.y = vtex.y * tmp.x + 0.25f
+    * o_vtop.z = vtex.y * tmp.y + 0.25f
+    * o_vtop.w = 1 / tmp.x
+    *
+    * o_vbottom.x = vtex.x
+    * o_vbottom.y = vtex.y * tmp.x - 0.25f
+    * o_vbottom.z = vtex.y * tmp.y - 0.25f
+    * o_vbottom.w = 1 / tmp.y
+    */
    ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X),
             ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f));
    ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
@@ -122,7 +140,7 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
    if (!shader)
       return false;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 3; ++i) {
       csc[i] = ureg_DECL_constant(shader, i);
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -135,7 +153,7 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
     * fragment = csc * texel
     */
    for (i = 0; i < 3; ++i)
-      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc, sampler[i]);
+      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D_ARRAY, tc, sampler[i]);
 
    ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
@@ -182,13 +200,15 @@ create_frag_shader_weave(struct vl_compositor *c)
 
    /* calculate the texture offsets
     * t_tc.x = i_tc.x
-    * t_tc.y = (round(i_tc.y) + 0.5) / height * 2
+    * t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2
     */
    for (i = 0; i < 2; ++i) {
       ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]);
-      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), i_tc[i]);
+      ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
+               i_tc[i], ureg_imm1f(shader, 0.5f));
+      ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]));
       ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
-               ureg_imm1f(shader, i ? 0.75f : 0.25f));
+               ureg_imm1f(shader, i ? 1.0f : 0.0f));
       ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
                ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
       ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
@@ -208,7 +228,7 @@ create_frag_shader_weave(struct vl_compositor *c)
             TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
 
          ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
-                  TGSI_TEXTURE_3D, src, sampler[j]);
+                  TGSI_TEXTURE_2D_ARRAY, src, sampler[j]);
       }
 
    /* calculate linear interpolation factor
@@ -217,11 +237,11 @@ create_frag_shader_weave(struct vl_compositor *c)
    ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]);
    ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
             ureg_src(t_tc[0]), ureg_negate(i_tc[0]));
-   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_XY),
+   ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ),
             ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f));
    ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]),
             TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z),
-            ureg_src(t_texel[1]), ureg_src(t_texel[0]));
+            ureg_src(t_texel[0]), ureg_src(t_texel[1]));
 
    /* and finally do colour space transformation
     * fragment = csc * texel
@@ -295,22 +315,24 @@ static void *
 create_frag_shader_rgba(struct vl_compositor *c)
 {
    struct ureg_program *shader;
-   struct ureg_src tc;
-   struct ureg_src sampler;
-   struct ureg_dst fragment;
+   struct ureg_src tc, color, sampler;
+   struct ureg_dst texel, fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
 
    tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
+   color = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR, TGSI_INTERPOLATE_LINEAR);
    sampler = ureg_DECL_sampler(shader, 0);
+   texel = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
     * fragment = tex(tc, sampler)
     */
-   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_MUL(shader, fragment, ureg_src(texel), color);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, c->pipe);
@@ -422,7 +444,7 @@ init_pipe_state(struct vl_compositor *c)
    c->blend_add = c->pipe->create_blend_state(c->pipe, &blend);
 
    memset(&rast, 0, sizeof rast);
-   rast.flatshade = 1;
+   rast.flatshade = 0;
    rast.front_ccw = 1;
    rast.cull_face = PIPE_FACE_NONE;
    rast.fill_back = PIPE_POLYGON_MODE_FILL;
@@ -432,7 +454,8 @@ init_pipe_state(struct vl_compositor *c)
    rast.point_size_per_vertex = 1;
    rast.offset_units = 1;
    rast.offset_scale = 1;
-   rast.gl_rasterization_rules = 1;
+   rast.half_pixel_center = 1;
+   rast.bottom_edge_rule = 1;
    rast.depth_clip = 1;
 
    c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
@@ -475,36 +498,19 @@ static void cleanup_pipe_state(struct vl_compositor *c)
    c->pipe->delete_rasterizer_state(c->pipe, c->rast);
 }
 
-static bool
-create_vertex_buffer(struct vl_compositor *c)
-{
-   assert(c);
-
-   pipe_resource_reference(&c->vertex_buf.buffer, NULL);
-   c->vertex_buf.buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STREAM,
-      c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4
-   );
-
-   return c->vertex_buf.buffer != NULL;
-}
-
 static bool
 init_buffers(struct vl_compositor *c)
 {
-   struct pipe_vertex_element vertex_elems[2];
+   struct pipe_vertex_element vertex_elems[3];
 
    assert(c);
 
    /*
     * Create our vertex buffer and vertex buffer elements
     */
-   c->vertex_buf.stride = sizeof(struct vertex2f) + sizeof(struct vertex4f);
+   c->vertex_buf.stride = sizeof(struct vertex2f) + sizeof(struct vertex4f) * 2;
    c->vertex_buf.buffer_offset = 0;
-   create_vertex_buffer(c);
+   c->vertex_buf.buffer = NULL;
 
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
@@ -514,7 +520,11 @@ init_buffers(struct vl_compositor *c)
    vertex_elems[1].instance_divisor = 0;
    vertex_elems[1].vertex_buffer_index = 0;
    vertex_elems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) + sizeof(struct vertex4f);
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 3, vertex_elems);
 
    return true;
 }
@@ -528,31 +538,31 @@ cleanup_buffers(struct vl_compositor *c)
    pipe_resource_reference(&c->vertex_buf.buffer, NULL);
 }
 
-static INLINE struct pipe_video_rect
+static inline struct u_rect
 default_rect(struct vl_compositor_layer *layer)
 {
    struct pipe_resource *res = layer->sampler_views[0]->texture;
-   struct pipe_video_rect rect = { 0, 0, res->width0, res->height0 * res->depth0 };
+   struct u_rect rect = { 0, res->width0, 0, res->height0 * res->array_size };
    return rect;
 }
 
-static INLINE struct vertex2f
-calc_topleft(struct vertex2f size, struct pipe_video_rect rect)
+static inline struct vertex2f
+calc_topleft(struct vertex2f size, struct u_rect rect)
 {
-   struct vertex2f res = { rect.x / size.x, rect.y / size.y };
+   struct vertex2f res = { rect.x0 / size.x, rect.y0 / size.y };
    return res;
 }
 
-static INLINE struct vertex2f
-calc_bottomright(struct vertex2f size, struct pipe_video_rect rect)
+static inline struct vertex2f
+calc_bottomright(struct vertex2f size, struct u_rect rect)
 {
-   struct vertex2f res = { (rect.x + rect.w) / size.x, (rect.y + rect.h) / size.y };
+   struct vertex2f res = { rect.x1 / size.x, rect.y1 / size.y };
    return res;
 }
 
-static INLINE void
+static inline void
 calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
-                 struct pipe_video_rect src, struct pipe_video_rect dst)
+                 struct u_rect src, struct u_rect dst)
 {
    struct vertex2f size =  { width, height };
 
@@ -567,43 +577,125 @@ calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned hei
 static void
 gen_rect_verts(struct vertex2f *vb, struct vl_compositor_layer *layer)
 {
+   struct vertex2f tl, tr, br, bl;
+
    assert(vb && layer);
 
-   vb[ 0].x = layer->dst.tl.x;
-   vb[ 0].y = layer->dst.tl.y;
+   switch (layer->rotate) {
+   default:
+   case VL_COMPOSITOR_ROTATE_0:
+      tl = layer->dst.tl;
+      tr.x = layer->dst.br.x;
+      tr.y = layer->dst.tl.y;
+      br = layer->dst.br;
+      bl.x = layer->dst.tl.x;
+      bl.y = layer->dst.br.y;
+      break;
+   case VL_COMPOSITOR_ROTATE_90:
+      tl.x = layer->dst.br.x;
+      tl.y = layer->dst.tl.y;
+      tr = layer->dst.br;
+      br.x = layer->dst.tl.x;
+      br.y = layer->dst.br.y;
+      bl = layer->dst.tl;
+      break;
+   case VL_COMPOSITOR_ROTATE_180:
+      tl = layer->dst.br;
+      tr.x = layer->dst.tl.x;
+      tr.y = layer->dst.br.y;
+      br = layer->dst.tl;
+      bl.x = layer->dst.br.x;
+      bl.y = layer->dst.tl.y;
+      break;
+   case VL_COMPOSITOR_ROTATE_270:
+      tl.x = layer->dst.tl.x;
+      tl.y = layer->dst.br.y;
+      tr = layer->dst.tl;
+      br.x = layer->dst.br.x;
+      br.y = layer->dst.tl.y;
+      bl = layer->dst.br;
+      break;
+   }
+
+   vb[ 0].x = tl.x;
+   vb[ 0].y = tl.y;
    vb[ 1].x = layer->src.tl.x;
    vb[ 1].y = layer->src.tl.y;
    vb[ 2] = layer->zw;
-
-   vb[ 3].x = layer->dst.br.x;
-   vb[ 3].y = layer->dst.tl.y;
-   vb[ 4].x = layer->src.br.x;
-   vb[ 4].y = layer->src.tl.y;
-   vb[ 5] = layer->zw;
-
-   vb[ 6].x = layer->dst.br.x;
-   vb[ 6].y = layer->dst.br.y;
-   vb[ 7].x = layer->src.br.x;
-   vb[ 7].y = layer->src.br.y;
-   vb[ 8] = layer->zw;
-
-   vb[ 9].x = layer->dst.tl.x;
-   vb[ 9].y = layer->dst.br.y;
-   vb[10].x = layer->src.tl.x;
-   vb[10].y = layer->src.br.y;
-   vb[11] = layer->zw;
+   vb[ 3].x = layer->colors[0].x;
+   vb[ 3].y = layer->colors[0].y;
+   vb[ 4].x = layer->colors[0].z;
+   vb[ 4].y = layer->colors[0].w;
+
+   vb[ 5].x = tr.x;
+   vb[ 5].y = tr.y;
+   vb[ 6].x = layer->src.br.x;
+   vb[ 6].y = layer->src.tl.y;
+   vb[ 7] = layer->zw;
+   vb[ 8].x = layer->colors[1].x;
+   vb[ 8].y = layer->colors[1].y;
+   vb[ 9].x = layer->colors[1].z;
+   vb[ 9].y = layer->colors[1].w;
+
+   vb[10].x = br.x;
+   vb[10].y = br.y;
+   vb[11].x = layer->src.br.x;
+   vb[11].y = layer->src.br.y;
+   vb[12] = layer->zw;
+   vb[13].x = layer->colors[2].x;
+   vb[13].y = layer->colors[2].y;
+   vb[14].x = layer->colors[2].z;
+   vb[14].y = layer->colors[2].w;
+
+   vb[15].x = bl.x;
+   vb[15].y = bl.y;
+   vb[16].x = layer->src.tl.x;
+   vb[16].y = layer->src.br.y;
+   vb[17] = layer->zw;
+   vb[18].x = layer->colors[3].x;
+   vb[18].y = layer->colors[3].y;
+   vb[19].x = layer->colors[3].z;
+   vb[19].y = layer->colors[3].w;
 }
 
-static INLINE struct u_rect
+static inline struct u_rect
 calc_drawn_area(struct vl_compositor_state *s, struct vl_compositor_layer *layer)
 {
+   struct vertex2f tl, br;
    struct u_rect result;
 
+   assert(s && layer);
+
+   // rotate
+   switch (layer->rotate) {
+   default:
+   case VL_COMPOSITOR_ROTATE_0:
+      tl = layer->dst.tl;
+      br = layer->dst.br;
+      break;
+   case VL_COMPOSITOR_ROTATE_90:
+      tl.x = layer->dst.br.x;
+      tl.y = layer->dst.tl.y;
+      br.x = layer->dst.tl.x;
+      br.y = layer->dst.br.y;
+      break;
+   case VL_COMPOSITOR_ROTATE_180:
+      tl = layer->dst.br;
+      br = layer->dst.tl;
+      break;
+   case VL_COMPOSITOR_ROTATE_270:
+      tl.x = layer->dst.tl.x;
+      tl.y = layer->dst.br.y;
+      br.x = layer->dst.br.x;
+      br.y = layer->dst.tl.y;
+      break;
+   }
+
    // scale
-   result.x0 = layer->dst.tl.x * s->viewport.scale[0] + s->viewport.translate[0];
-   result.y0 = layer->dst.tl.y * s->viewport.scale[1] + s->viewport.translate[1];
-   result.x1 = layer->dst.br.x * s->viewport.scale[0] + s->viewport.translate[0];
-   result.y1 = layer->dst.br.y * s->viewport.scale[1] + s->viewport.translate[1];
+   result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
+   result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
+   result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
+   result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
 
    // and clip
    result.x0 = MAX2(result.x0, s->scissor.minx);
@@ -617,28 +709,29 @@ static void
 gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty)
 {
    struct vertex2f *vb;
-   struct pipe_transfer *buf_transfer;
    unsigned i;
 
    assert(c);
 
-   vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
-                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DONTBLOCK,
-                        &buf_transfer);
-
-   if (!vb) {
-      // If buffer is still locked from last draw create a new one
-      create_vertex_buffer(c);
-      vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
-                           PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
-                           &buf_transfer);
-   }
+   /* Allocate new memory for vertices. */
+   u_upload_alloc(c->upload, 0,
+                  c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4, /* size */
+                  4, /* alignment */
+                  &c->vertex_buf.buffer_offset, &c->vertex_buf.buffer,
+                  (void**)&vb);
 
    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
       if (s->used_layers & (1 << i)) {
          struct vl_compositor_layer *layer = &s->layers[i];
          gen_rect_verts(vb, layer);
-         vb += 12;
+         vb += 20;
+
+         if (!layer->viewport_valid) {
+            layer->viewport.scale[0] = c->fb_state.width;
+            layer->viewport.scale[1] = c->fb_state.height;
+            layer->viewport.translate[0] = 0;
+            layer->viewport.translate[1] = 0;
+         }
 
          if (dirty && layer->clearing) {
             struct u_rect drawn = calc_drawn_area(s, layer);
@@ -656,7 +749,7 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u
       }
    }
 
-   pipe_buffer_unmap(c->pipe, buf_transfer);
+   u_upload_unmap(c->upload);
 }
 
 static void
@@ -674,9 +767,13 @@ draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rec
          void *blend = layer->blend ? layer->blend : i ? c->blend_add : c->blend_clear;
 
          c->pipe->bind_blend_state(c->pipe, blend);
+         c->pipe->set_viewport_states(c->pipe, 0, 1, &layer->viewport);
          c->pipe->bind_fs_state(c->pipe, layer->fs);
-         c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, layer->samplers);
-         c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
+         c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_FRAGMENT, 0,
+                                      num_sampler_views, layer->samplers);
+         c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0,
+                                    num_sampler_views, samplers);
+
          util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
          vb_index++;
 
@@ -728,11 +825,18 @@ vl_compositor_clear_layers(struct vl_compositor_state *s)
 
    s->used_layers = 0;
    for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      struct vertex4f v_one = { 1.0f, 1.0f, 1.0f, 1.0f };
       s->layers[i].clearing = i ? false : true;
       s->layers[i].blend = NULL;
       s->layers[i].fs = NULL;
+      s->layers[i].viewport.scale[2] = 1;
+      s->layers[i].viewport.translate[2] = 0;
+      s->layers[i].rotate = VL_COMPOSITOR_ROTATE_0;
+
       for ( j = 0; j < 3; j++)
          pipe_sampler_view_reference(&s->layers[i].sampler_views[j], NULL);
+      for ( j = 0; j < 4; ++j)
+         s->layers[i].colors[j] = v_one;
    }
 }
 
@@ -741,13 +845,14 @@ vl_compositor_cleanup(struct vl_compositor *c)
 {
    assert(c);
 
+   u_upload_destroy(c->upload);
    cleanup_buffers(c);
    cleanup_shaders(c);
    cleanup_pipe_state(c);
 }
 
 void
-vl_compositor_set_csc_matrix(struct vl_compositor_state *s, const float matrix[16])
+vl_compositor_set_csc_matrix(struct vl_compositor_state *s, vl_csc_matrix const *matrix)
 {
    struct pipe_transfer *buf_transfer;
 
@@ -759,37 +864,23 @@ vl_compositor_set_csc_matrix(struct vl_compositor_state *s, const float matrix[1
                       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
                       &buf_transfer),
       matrix,
-      sizeof(csc_matrix)
+      sizeof(vl_csc_matrix)
    );
 
    pipe_buffer_unmap(s->pipe, buf_transfer);
 }
 
 void
-vl_compositor_set_dst_area(struct vl_compositor_state *s, struct pipe_video_rect *dst_area)
-{
-   assert(s);
-
-   s->viewport_valid = dst_area != NULL;
-   if (dst_area) {
-      s->viewport.scale[0] = dst_area->w;
-      s->viewport.scale[1] = dst_area->h;
-      s->viewport.translate[0] = dst_area->x;
-      s->viewport.translate[1] = dst_area->y;
-   }
-}
-
-void
-vl_compositor_set_dst_clip(struct vl_compositor_state *s, struct pipe_video_rect *dst_clip)
+vl_compositor_set_dst_clip(struct vl_compositor_state *s, struct u_rect *dst_clip)
 {
    assert(s);
 
    s->scissor_valid = dst_clip != NULL;
    if (dst_clip) {
-      s->scissor.minx = dst_clip->x;
-      s->scissor.miny = dst_clip->y;
-      s->scissor.maxx = dst_clip->x + dst_clip->w;
-      s->scissor.maxy = dst_clip->y + dst_clip->h;
+      s->scissor.minx = dst_clip->x0;
+      s->scissor.miny = dst_clip->y0;
+      s->scissor.maxx = dst_clip->x1;
+      s->scissor.maxy = dst_clip->y1;
    }
 }
 
@@ -806,13 +897,30 @@ vl_compositor_set_layer_blend(struct vl_compositor_state *s,
    s->layers[layer].blend = blend;
 }
 
+void
+vl_compositor_set_layer_dst_area(struct vl_compositor_state *s,
+                                 unsigned layer, struct u_rect *dst_area)
+{
+   assert(s);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   s->layers[layer].viewport_valid = dst_area != NULL;
+   if (dst_area) {
+      s->layers[layer].viewport.scale[0] = dst_area->x1 - dst_area->x0;
+      s->layers[layer].viewport.scale[1] = dst_area->y1 - dst_area->y0;
+      s->layers[layer].viewport.translate[0] = dst_area->x0;
+      s->layers[layer].viewport.translate[1] = dst_area->y0;
+   }
+}
+
 void
 vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
                                struct vl_compositor *c,
                                unsigned layer,
                                struct pipe_video_buffer *buffer,
-                               struct pipe_video_rect *src_rect,
-                               struct pipe_video_rect *dst_rect,
+                               struct u_rect *src_rect,
+                               struct u_rect *dst_rect,
                                enum vl_compositor_deinterlace deinterlace)
 {
    struct pipe_sampler_view **sampler_views;
@@ -841,14 +949,14 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
          break;
 
       case VL_COMPOSITOR_BOB_TOP:
-         s->layers[layer].zw.x = 0.25f;
+         s->layers[layer].zw.x = 0.0f;
          s->layers[layer].src.tl.y += half_a_line;
          s->layers[layer].src.br.y += half_a_line;
          s->layers[layer].fs = c->fs_video_buffer;
          break;
 
       case VL_COMPOSITOR_BOB_BOTTOM:
-         s->layers[layer].zw.x = 0.75f;
+         s->layers[layer].zw.x = 1.0f;
          s->layers[layer].src.tl.y -= half_a_line;
          s->layers[layer].src.br.y -= half_a_line;
          s->layers[layer].fs = c->fs_video_buffer;
@@ -865,8 +973,8 @@ vl_compositor_set_palette_layer(struct vl_compositor_state *s,
                                 unsigned layer,
                                 struct pipe_sampler_view *indexes,
                                 struct pipe_sampler_view *palette,
-                                struct pipe_video_rect *src_rect,
-                                struct pipe_video_rect *dst_rect,
+                                struct u_rect *src_rect,
+                                struct u_rect *dst_rect,
                                 bool include_color_conversion)
 {
    assert(s && c && indexes && palette);
@@ -894,9 +1002,12 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *s,
                              struct vl_compositor *c,
                              unsigned layer,
                              struct pipe_sampler_view *rgba,
-                             struct pipe_video_rect *src_rect,
-                             struct pipe_video_rect *dst_rect)
+                             struct u_rect *src_rect,
+                             struct u_rect *dst_rect,
+                             struct vertex4f *colors)
 {
+   unsigned i;
+
    assert(s && c && rgba);
 
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
@@ -912,13 +1023,28 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *s,
    calc_src_and_dst(&s->layers[layer], rgba->texture->width0, rgba->texture->height0,
                     src_rect ? *src_rect : default_rect(&s->layers[layer]),
                     dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+
+   if (colors)
+      for (i = 0; i < 4; ++i)
+         s->layers[layer].colors[i] = colors[i];
+}
+
+void
+vl_compositor_set_layer_rotation(struct vl_compositor_state *s,
+                                 unsigned layer,
+                                 enum vl_compositor_rotation rotate)
+{
+   assert(s);
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+   s->layers[layer].rotate = rotate;
 }
 
 void
 vl_compositor_render(struct vl_compositor_state *s,
                      struct vl_compositor       *c,
                      struct pipe_surface        *dst_surface,
-                     struct u_rect              *dirty_area)
+                     struct u_rect              *dirty_area,
+                     bool                        clear_dirty)
 {
    assert(c);
    assert(dst_surface);
@@ -927,24 +1053,18 @@ vl_compositor_render(struct vl_compositor_state *s,
    c->fb_state.height = dst_surface->height;
    c->fb_state.cbufs[0] = dst_surface;
    
-   if (!s->viewport_valid) {
-      s->viewport.scale[0] = dst_surface->width;
-      s->viewport.scale[1] = dst_surface->height;
-      s->viewport.translate[0] = 0;
-      s->viewport.translate[1] = 0;
-   }
-
    if (!s->scissor_valid) {
       s->scissor.minx = 0;
       s->scissor.miny = 0;
       s->scissor.maxx = dst_surface->width;
       s->scissor.maxy = dst_surface->height;
    }
+   c->pipe->set_scissor_states(c->pipe, 0, 1, &s->scissor);
 
    gen_vertex_data(c, s, dirty_area);
 
-   if (dirty_area && (dirty_area->x0 < dirty_area->x1 ||
-                      dirty_area->y0 < dirty_area->y1)) {
+   if (clear_dirty && dirty_area &&
+       (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
 
       c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
                                    0, 0, dst_surface->width, dst_surface->height);
@@ -952,13 +1072,11 @@ vl_compositor_render(struct vl_compositor_state *s,
       dirty_area->x1 = dirty_area->y1 = MIN_DIRTY;
    }
 
-   c->pipe->set_scissor_state(c->pipe, &s->scissor);
    c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
-   c->pipe->set_viewport_state(c->pipe, &s->viewport);
    c->pipe->bind_vs_state(c->pipe, c->vs);
-   c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
+   c->pipe->set_vertex_buffers(c->pipe, 0, 1, &c->vertex_buf);
    c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
-   c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, s->csc_matrix);
+   pipe_set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, s->csc_matrix);
    c->pipe->bind_rasterizer_state(c->pipe, c->rast);
 
    draw_layers(c, s, dirty_area);
@@ -973,15 +1091,24 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
 
    c->pipe = pipe;
 
-   if (!init_pipe_state(c))
+   c->upload = u_upload_create(pipe, 128 * 1024, PIPE_BIND_VERTEX_BUFFER);
+
+   if (!c->upload)
       return false;
 
+   if (!init_pipe_state(c)) {
+      u_upload_destroy(c->upload);
+      return false;
+   }
+
    if (!init_shaders(c)) {
+      u_upload_destroy(c->upload);
       cleanup_pipe_state(c);
       return false;
    }
 
    if (!init_buffers(c)) {
+      u_upload_destroy(c->upload);
       cleanup_shaders(c);
       cleanup_pipe_state(c);
       return false;
@@ -993,7 +1120,7 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
 bool
 vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pipe)
 {
-   csc_matrix csc_matrix;
+   vl_csc_matrix csc_matrix;
 
    assert(s);
 
@@ -1001,11 +1128,6 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
 
    s->pipe = pipe;
 
-   s->viewport.scale[2] = 1;
-   s->viewport.scale[3] = 1;
-   s->viewport.translate[2] = 0;
-   s->viewport.translate[3] = 0;
-
    s->clear_color.f[0] = s->clear_color.f[1] = 0.0f;
    s->clear_color.f[2] = s->clear_color.f[3] = 0.0f;
 
@@ -1018,14 +1140,14 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
    (
       pipe->screen,
       PIPE_BIND_CONSTANT_BUFFER,
-      PIPE_USAGE_STATIC,
+      PIPE_USAGE_DEFAULT,
       sizeof(csc_matrix)
    );
 
    vl_compositor_clear_layers(s);
 
-   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
-   vl_compositor_set_csc_matrix(s, csc_matrix);
+   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, &csc_matrix);
+   vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix);
 
    return true;
 }