Merge branch 'master' into r300-compiler
[mesa.git] / src / gallium / auxiliary / util / u_blit.c
index 9adf72944e6d2938eb835fc8089baabbe2e9531b..c516317d701113638fa362f64cfa040daf6c70f1 100644 (file)
 
 
 #include "pipe/p_context.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
 
 #include "util/u_blit.h"
 #include "util/u_draw_quad.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
+#include "util/u_surface.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -60,12 +61,12 @@ struct blit_state
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
 
-   struct pipe_shader_state vert_shader;
-   struct pipe_shader_state frag_shader;
    void *vs;
-   void *fs;
+   void *fs[TGSI_WRITEMASK_XYZW + 1];
 
    struct pipe_buffer *vbuf;  /**< quad vertices */
+   unsigned vbuf_slot;
+
    float vertices[4][2][4];   /**< vertex/texcoords for quad */
 };
 
@@ -89,10 +90,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
 
    /* disabled blending/masking */
    memset(&ctx->blend, 0, sizeof(ctx->blend));
-   ctx->blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-   ctx->blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-   ctx->blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
-   ctx->blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
    ctx->blend.colormask = PIPE_MASK_RGBA;
 
    /* no-op depth/stencil/alpha */
@@ -102,8 +99,8 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
    ctx->rasterizer.front_winding = PIPE_WINDING_CW;
    ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
-   ctx->rasterizer.bypass_clipping = 1;
-   /*ctx->rasterizer.bypass_vs = 1;*/
+   ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
+   ctx->rasterizer.gl_rasterization_rules = 1;
 
    /* samplers */
    memset(&ctx->sampler, 0, sizeof(ctx->sampler));
@@ -115,39 +112,21 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.mag_img_filter = 0; /* set later */
    ctx->sampler.normalized_coords = 1;
 
-   /* viewport (identity, we setup vertices in wincoords) */
-   ctx->viewport.scale[0] = 1.0;
-   ctx->viewport.scale[1] = 1.0;
-   ctx->viewport.scale[2] = 1.0;
-   ctx->viewport.scale[3] = 1.0;
-   ctx->viewport.translate[0] = 0.0;
-   ctx->viewport.translate[1] = 0.0;
-   ctx->viewport.translate[2] = 0.0;
-   ctx->viewport.translate[3] = 0.0;
-
-   /* vertex shader */
+
+   /* vertex shader - still required to provide the linkage between
+    * fragment shader input semantics and vertex_element/buffers.
+    */
    {
       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
                                       TGSI_SEMANTIC_GENERIC };
       const uint semantic_indexes[] = { 0, 0 };
       ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
-                                                    semantic_indexes,
-                                                    &ctx->vert_shader);
+                                                    semantic_indexes);
    }
 
    /* fragment shader */
-   ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader);
-
-   ctx->vbuf = pipe_buffer_create(pipe->screen,
-                                  32,
-                                  PIPE_BUFFER_USAGE_VERTEX,
-                                  sizeof(ctx->vertices));
-   if (!ctx->vbuf) {
-      FREE(ctx);
-      ctx->pipe->delete_fs_state(ctx->pipe, ctx->fs);
-      ctx->pipe->delete_vs_state(ctx->pipe, ctx->vs);
-      return NULL;
-   }
+   ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe);
+   ctx->vbuf = NULL;
 
    /* init vertex data that doesn't change */
    for (i = 0; i < 4; i++) {
@@ -167,28 +146,51 @@ void
 util_destroy_blit(struct blit_state *ctx)
 {
    struct pipe_context *pipe = ctx->pipe;
+   unsigned i;
 
    pipe->delete_vs_state(pipe, ctx->vs);
-   pipe->delete_fs_state(pipe, ctx->fs);
 
-   FREE((void*) ctx->vert_shader.tokens);
-   FREE((void*) ctx->frag_shader.tokens);
+   for (i = 0; i < Elements(ctx->fs); i++)
+      if (ctx->fs[i])
+         pipe->delete_fs_state(pipe, ctx->fs[i]);
 
-   pipe_buffer_reference(pipe->screen, &ctx->vbuf, NULL);
+   pipe_buffer_reference(&ctx->vbuf, NULL);
 
    FREE(ctx);
 }
 
 
+/**
+ * Get offset of next free slot in vertex buffer for quad vertices.
+ */
+static unsigned
+get_next_slot( struct blit_state *ctx )
+{
+   const unsigned max_slots = 4096 / sizeof ctx->vertices;
+
+   if (ctx->vbuf_slot >= max_slots) 
+      util_blit_flush( ctx );
+
+   if (!ctx->vbuf) {
+      ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+                                     32,
+                                     PIPE_BUFFER_USAGE_VERTEX,
+                                     max_slots * sizeof ctx->vertices);
+   }
+   
+   return ctx->vbuf_slot++ * sizeof ctx->vertices;
+}
+                               
+
 /**
  * Setup vertex data for the textured quad we'll draw.
  * Note: y=0=top
  */
-static void
+static unsigned
 setup_vertex_data(struct blit_state *ctx,
                   float x0, float y0, float x1, float y1, float z)
 {
-   void *buf;
+   unsigned offset;
 
    ctx->vertices[0][0][0] = x0;
    ctx->vertices[0][0][1] = y0;
@@ -214,12 +216,12 @@ setup_vertex_data(struct blit_state *ctx,
    ctx->vertices[3][1][0] = 0.0f;
    ctx->vertices[3][1][1] = 1.0f;
 
-   buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf,
-                         PIPE_BUFFER_USAGE_CPU_WRITE);
+   offset = get_next_slot( ctx );
 
-   memcpy(buf, ctx->vertices, sizeof(ctx->vertices));
+   pipe_buffer_write(ctx->pipe->screen, ctx->vbuf,
+                     offset, sizeof(ctx->vertices), ctx->vertices);
 
-   pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf);
+   return offset;
 }
 
 
@@ -227,13 +229,13 @@ setup_vertex_data(struct blit_state *ctx,
  * Setup vertex data for the textured quad we'll draw.
  * Note: y=0=top
  */
-static void
+static unsigned
 setup_vertex_data_tex(struct blit_state *ctx,
                       float x0, float y0, float x1, float y1,
                       float s0, float t0, float s1, float t1,
                       float z)
 {
-   void *buf;
+   unsigned offset;
 
    ctx->vertices[0][0][0] = x0;
    ctx->vertices[0][0][1] = y0;
@@ -259,27 +261,57 @@ setup_vertex_data_tex(struct blit_state *ctx,
    ctx->vertices[3][1][0] = s0;
    ctx->vertices[3][1][1] = t1;
 
-   buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf,
-                         PIPE_BUFFER_USAGE_CPU_WRITE);
+   offset = get_next_slot( ctx );
 
-   memcpy(buf, ctx->vertices, sizeof(ctx->vertices));
+   pipe_buffer_write(ctx->pipe->screen, ctx->vbuf,
+                     offset, sizeof(ctx->vertices), ctx->vertices);
 
-   pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf);
+   return offset;
 }
+
+
+/**
+ * \return TRUE if two regions overlap, FALSE otherwise
+ */
+static boolean
+regions_overlap(int srcX0, int srcY0,
+                int srcX1, int srcY1,
+                int dstX0, int dstY0,
+                int dstX1, int dstY1)
+{
+   if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
+      return FALSE; /* src completely left of dst */
+
+   if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
+      return FALSE; /* dst completely left of src */
+
+   if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
+      return FALSE; /* src completely above dst */
+
+   if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
+      return FALSE; /* dst completely above src */
+
+   return TRUE; /* some overlap */
+}
+
+
 /**
  * Copy pixel block from src surface to dst surface.
  * Overlapping regions are acceptable.
+ * Flipping and stretching are supported.
+ * XXX what about clipping???
  * XXX need some control over blitting Z and/or stencil.
  */
 void
-util_blit_pixels(struct blit_state *ctx,
-                 struct pipe_surface *src,
-                 int srcX0, int srcY0,
-                 int srcX1, int srcY1,
-                 struct pipe_surface *dst,
-                 int dstX0, int dstY0,
-                 int dstX1, int dstY1,
-                 float z, uint filter)
+util_blit_pixels_writemask(struct blit_state *ctx,
+                           struct pipe_surface *src,
+                           int srcX0, int srcY0,
+                           int srcX1, int srcY1,
+                           struct pipe_surface *dst,
+                           int dstX0, int dstY0,
+                           int dstX1, int dstY1,
+                           float z, uint filter,
+                           uint writemask)
 {
    struct pipe_context *pipe = ctx->pipe;
    struct pipe_screen *screen = pipe->screen;
@@ -290,10 +322,42 @@ util_blit_pixels(struct blit_state *ctx,
    const int srcH = abs(srcY1 - srcY0);
    const int srcLeft = MIN2(srcX0, srcX1);
    const int srcTop = MIN2(srcY0, srcY1);
+   unsigned offset;
+   boolean overlap;
 
    assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
           filter == PIPE_TEX_MIPFILTER_LINEAR);
 
+   assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
+                                      PIPE_TEXTURE_USAGE_SAMPLER, 0));
+   assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
+                                      PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
+
+   /* do the regions overlap? */
+   overlap = util_same_surface(src, dst) &&
+      regions_overlap(srcX0, srcY0, srcX1, srcY1,
+                      dstX0, dstY0, dstX1, dstY1);
+
+   /*
+    * Check for simple case:  no format conversion, no flipping, no stretching,
+    * no overlapping.
+    * Filter mode should not matter since there's no stretching.
+    */
+   if (dst->format == src->format &&
+       srcX0 < srcX1 &&
+       dstX0 < dstX1 &&
+       srcY0 < srcY1 &&
+       dstY0 < dstY1 &&
+       (dstX1 - dstX0) == (srcX1 - srcX0) &&
+       (dstY1 - dstY0) == (srcY1 - srcY0) &&
+       !overlap) {
+      pipe->surface_copy(pipe,
+                        dst, dstX0, dstY0, /* dest */
+                        src, srcX0, srcY0, /* src */
+                        srcW, srcH);       /* size */
+      return;
+   }
+   
    if (srcLeft != srcX0) {
       /* left-right flip */
       int tmp = dstX0;
@@ -308,20 +372,6 @@ util_blit_pixels(struct blit_state *ctx,
       dstY1 = tmp;
    }
 
-   assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
-                                      PIPE_TEXTURE_USAGE_SAMPLER, 0));
-   assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
-                                      PIPE_TEXTURE_USAGE_SAMPLER, 0));
-
-   if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) {
-      /* FIXME: this will most surely fail for overlapping rectangles */
-      pipe->surface_copy(pipe, FALSE,
-                        dst, dstX0, dstY0,   /* dest */
-                        src, srcX0, srcY0, /* src */
-                        srcW, srcH);     /* size */
-      return;
-   }
-   
    assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
                                       PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
 
@@ -338,7 +388,6 @@ util_blit_pixels(struct blit_state *ctx,
    texTemp.width[0] = srcW;
    texTemp.height[0] = srcH;
    texTemp.depth[0] = 1;
-   texTemp.compressed = 0;
    pf_get_block(src->format, &texTemp.block);
 
    tex = screen->texture_create(screen, &texTemp);
@@ -349,14 +398,14 @@ util_blit_pixels(struct blit_state *ctx,
                                      PIPE_BUFFER_USAGE_GPU_WRITE);
 
    /* load temp texture */
-   pipe->surface_copy(pipe, FALSE,
+   pipe->surface_copy(pipe,
                       texSurf, 0, 0,   /* dest */
                       src, srcLeft, srcTop, /* src */
                       srcW, srcH);     /* size */
 
    /* free the surface, update the texture if necessary.
     */
-   screen->tex_surface_release(screen, &texSurf);
+   pipe_surface_reference(&texSurf, NULL);
 
    /* save state (restored below) */
    cso_save_blend(ctx->cso);
@@ -367,13 +416,11 @@ util_blit_pixels(struct blit_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
-   cso_save_viewport(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
-   cso_set_viewport(ctx->cso, &ctx->viewport);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -384,24 +431,27 @@ util_blit_pixels(struct blit_state *ctx,
    /* texture */
    cso_set_sampler_textures(ctx->cso, 1, &tex);
 
+   if (ctx->fs[writemask] == NULL)
+      ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask);
+
    /* shaders */
-   cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
 
    /* drawing dest */
    memset(&fb, 0, sizeof(fb));
    fb.width = dst->width;
    fb.height = dst->height;
-   fb.num_cbufs = 1;
+   fb.nr_cbufs = 1;
    fb.cbufs[0] = dst;
    cso_set_framebuffer(ctx->cso, &fb);
 
    /* draw quad */
-   setup_vertex_data(ctx,
-                     (float) dstX0, (float) dstY0, 
-                     (float) dstX1, (float) dstY1, z);
+   offset = setup_vertex_data(ctx,
+                              (float) dstX0, (float) dstY0, 
+                              (float) dstX1, (float) dstY1, z);
 
-   util_draw_vertex_buffer(ctx->pipe, ctx->vbuf,
+   util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset,
                            PIPE_PRIM_TRIANGLE_FAN,
                            4,  /* verts */
                            2); /* attribs/vert */
@@ -415,11 +465,43 @@ util_blit_pixels(struct blit_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
-   cso_restore_viewport(ctx->cso);
 
-   screen->texture_release(screen, &tex);
+   pipe_texture_reference(&tex, NULL);
 }
 
+
+void
+util_blit_pixels(struct blit_state *ctx,
+                 struct pipe_surface *src,
+                 int srcX0, int srcY0,
+                 int srcX1, int srcY1,
+                 struct pipe_surface *dst,
+                 int dstX0, int dstY0,
+                 int dstX1, int dstY1,
+                 float z, uint filter )
+{
+   util_blit_pixels_writemask( ctx, src, 
+                               srcX0, srcY0,
+                               srcX1, srcY1,
+                               dst,
+                               dstX0, dstY0,
+                               dstX1, dstY1,
+                               z, filter,
+                               TGSI_WRITEMASK_XYZW );
+}
+
+
+/* Release vertex buffer at end of frame to avoid synchronous
+ * rendering.
+ */
+void util_blit_flush( struct blit_state *ctx )
+{
+   pipe_buffer_reference(&ctx->vbuf, NULL);
+   ctx->vbuf_slot = 0;
+} 
+
+
+
 /**
  * Copy pixel block from src texture to dst surface.
  * Overlapping regions are acceptable.
@@ -437,10 +519,9 @@ util_blit_pixels_tex(struct blit_state *ctx,
                  int dstX1, int dstY1,
                  float z, uint filter)
 {
-   struct pipe_context *pipe = ctx->pipe;
-   struct pipe_screen *screen = pipe->screen;
    struct pipe_framebuffer_state fb;
    float s0, t0, s1, t1;
+   unsigned offset;
 
    assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
           filter == PIPE_TEX_MIPFILTER_LINEAR);
@@ -453,8 +534,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
    t0 = srcY0 / (float)tex->height[0];
    t1 = srcY1 / (float)tex->height[0];
 
-   assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
-                                      PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
+   assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
+                                                 PIPE_TEXTURE_2D,
+                                                 PIPE_TEXTURE_USAGE_RENDER_TARGET,
+                                                 0));
 
    /* save state (restored below) */
    cso_save_blend(ctx->cso);
@@ -465,13 +548,11 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
-   cso_save_viewport(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
-   cso_set_viewport(ctx->cso, &ctx->viewport);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -483,25 +564,26 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_set_sampler_textures(ctx->cso, 1, &tex);
 
    /* shaders */
-   cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
 
    /* drawing dest */
    memset(&fb, 0, sizeof(fb));
    fb.width = dst->width;
    fb.height = dst->height;
-   fb.num_cbufs = 1;
+   fb.nr_cbufs = 1;
    fb.cbufs[0] = dst;
    cso_set_framebuffer(ctx->cso, &fb);
 
    /* draw quad */
-   setup_vertex_data_tex(ctx,
-                     (float) dstX0, (float) dstY0,
-                     (float) dstX1, (float) dstY1,
-                     s0, t0, s1, t1,
-                     z);
-
-   util_draw_vertex_buffer(ctx->pipe, ctx->vbuf,
+   offset = setup_vertex_data_tex(ctx,
+                                  (float) dstX0, (float) dstY0,
+                                  (float) dstX1, (float) dstY1,
+                                  s0, t0, s1, t1,
+                                  z);
+
+   util_draw_vertex_buffer(ctx->pipe, 
+                           ctx->vbuf, offset,
                            PIPE_PRIM_TRIANGLE_FAN,
                            4,  /* verts */
                            2); /* attribs/vert */
@@ -515,5 +597,4 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
-   cso_restore_viewport(ctx->cso);
 }