st/mesa: add a second pipeline for compute
[mesa.git] / src / mesa / state_tracker / st_cb_bitmap.c
index c40a510c35159241141c153da88944fefaf798e5..2b2792e2be6a3bfd87678bb14fab928488b99733 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -34,7 +34,7 @@
 #include "main/image.h"
 #include "main/bufferobj.h"
 #include "main/macros.h"
-#include "main/mfeatures.h"
+#include "main/pbo.h"
 #include "program/program.h"
 #include "program/prog_print.h"
 
 #include "util/u_inlines.h"
 #include "util/u_draw_quad.h"
 #include "util/u_simple_shaders.h"
+#include "util/u_upload_mgr.h"
 #include "program/prog_instruction.h"
 #include "cso_cache/cso_context.h"
 
 
-#if FEATURE_drawpix
-
 /**
  * glBitmaps are drawn as textured quads.  The user's bitmap pattern
  * is stored in a texture image.  An alpha8 texture format is used.
@@ -108,128 +107,6 @@ struct bitmap_cache
 #define Z_EPSILON 1e-06
 
 
-/**
- * Make fragment program for glBitmap:
- *   Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_fragment_program *stfp;
-   struct gl_program *p;
-   GLuint ic = 0;
-
-   p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-   if (!p)
-      return NULL;
-
-   p->NumInstructions = 3;
-
-   p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
-   if (!p->Instructions) {
-      ctx->Driver.DeleteProgram(ctx, p);
-      return NULL;
-   }
-   _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
-   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
-   p->Instructions[ic].Opcode = OPCODE_TEX;
-   p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
-   p->Instructions[ic].DstReg.Index = 0;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
-   p->Instructions[ic].TexSrcUnit = samplerIndex;
-   p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
-   ic++;
-
-   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
-   p->Instructions[ic].Opcode = OPCODE_KIL;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
-   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
-      p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
-   p->Instructions[ic].SrcReg[0].Index = 0;
-   p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
-   ic++;
-
-   /* END; */
-   p->Instructions[ic++].Opcode = OPCODE_END;
-
-   assert(ic == p->NumInstructions);
-
-   p->InputsRead = FRAG_BIT_TEX0;
-   p->OutputsWritten = 0x0;
-   p->SamplersUsed = (1 << samplerIndex);
-
-   stfp = (struct st_fragment_program *) p;
-   stfp->Base.UsesKill = GL_TRUE;
-
-   return stfp;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
-   int i;
-   for (i = 0; i < 32; i++) {
-      if ((bitfield & (1 << i)) == 0) {
-         return i;
-      }
-   }
-   return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st  current context
- * \param fpIn  the incoming fragment program
- * \param fpOut  the new fragment program which does fragment culling
- * \param bitmap_sampler  sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
-                                struct gl_fragment_program *fpIn,
-                                struct gl_fragment_program **fpOut,
-                                GLuint *bitmap_sampler)
-{
-   struct st_fragment_program *bitmap_prog;
-   struct gl_program *newProg;
-   uint sampler;
-
-   /*
-    * Generate new program which is the user-defined program prefixed
-    * with the bitmap sampler/kill instructions.
-    */
-   sampler = find_free_bit(fpIn->Base.SamplersUsed);
-   bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
-   newProg = _mesa_combine_programs(st->ctx,
-                                    &bitmap_prog->Base.Base,
-                                    &fpIn->Base);
-   /* done with this after combining */
-   st_reference_fragprog(st, &bitmap_prog, NULL);
-
-#if 0
-   {
-      printf("Combined bitmap program:\n");
-      _mesa_print_program(newProg);
-      printf("InputsRead: 0x%x\n", newProg->InputsRead);
-      printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
-      _mesa_print_parameter_list(newProg->Parameters);
-   }
-#endif
-
-   /* return results */
-   *fpOut = (struct gl_fragment_program *) newProg;
-   *bitmap_sampler = sampler;
-}
-
-
 /**
  * Copy user-provide bitmap bits into texture buffer, expanding
  * bits into texels.
@@ -276,18 +153,16 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
     * Create texture to hold bitmap pattern.
     */
    pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
-                          0, width, height, 1, 1,
+                          0, width, height, 1, 1, 0,
                           PIPE_BIND_SAMPLER_VIEW);
    if (!pt) {
       _mesa_unmap_pbo_source(ctx, unpack);
       return NULL;
    }
 
-   transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
-                                PIPE_TRANSFER_WRITE,
-                                0, 0, width, height);
-
-   dest = pipe_transfer_map(pipe, transfer);
+   dest = pipe_transfer_map(st->pipe, pt, 0, 0,
+                            PIPE_TRANSFER_WRITE,
+                            0, 0, width, height, &transfer);
 
    /* Put image into texture transfer */
    memset(dest, 0xff, height * transfer->stride);
@@ -298,129 +173,28 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
 
    /* Release transfer */
    pipe_transfer_unmap(pipe, transfer);
-   pipe->transfer_destroy(pipe, transfer);
-
    return pt;
 }
 
-static GLuint
-setup_bitmap_vertex_data(struct st_context *st, bool normalized,
-                         int x, int y, int width, int height,
-                         float z, const float color[4])
-{
-   struct pipe_context *pipe = st->pipe;
-   const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-   const GLfloat fb_width = (GLfloat)fb->Width;
-   const GLfloat fb_height = (GLfloat)fb->Height;
-   const GLfloat x0 = (GLfloat)x;
-   const GLfloat x1 = (GLfloat)(x + width);
-   const GLfloat y0 = (GLfloat)y;
-   const GLfloat y1 = (GLfloat)(y + height);
-   GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
-   GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
-   const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
-   const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
-   const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
-   GLuint i;
-
-   if(!normalized)
-   {
-      sRight = width;
-      tBot = height;
-   }
-
-   /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
-    * no_flush) updates to buffers where we know there is no conflict
-    * with previous data.  Currently using max_slots > 1 will cause
-    * synchronous rendering if the driver flushes its command buffers
-    * between one bitmap and the next.  Our flush hook below isn't
-    * sufficient to catch this as the driver doesn't tell us when it
-    * flushes its own command buffers.  Until this gets fixed, pay the
-    * price of allocating a new buffer for each bitmap cache-flush to
-    * avoid synchronous rendering.
-    */
-   if (st->bitmap.vbuf_slot >= max_slots) {
-      pipe_resource_reference(&st->bitmap.vbuf, NULL);
-      st->bitmap.vbuf_slot = 0;
-   }
-
-   if (!st->bitmap.vbuf) {
-      st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 
-                                           PIPE_BIND_VERTEX_BUFFER,
-                                           max_slots *
-                                           sizeof(st->bitmap.vertices));
-   }
-
-   /* Positions are in clip coords since we need to do clipping in case
-    * the bitmap quad goes beyond the window bounds.
-    */
-   st->bitmap.vertices[0][0][0] = clip_x0;
-   st->bitmap.vertices[0][0][1] = clip_y0;
-   st->bitmap.vertices[0][2][0] = sLeft;
-   st->bitmap.vertices[0][2][1] = tTop;
-
-   st->bitmap.vertices[1][0][0] = clip_x1;
-   st->bitmap.vertices[1][0][1] = clip_y0;
-   st->bitmap.vertices[1][2][0] = sRight;
-   st->bitmap.vertices[1][2][1] = tTop;
-   
-   st->bitmap.vertices[2][0][0] = clip_x1;
-   st->bitmap.vertices[2][0][1] = clip_y1;
-   st->bitmap.vertices[2][2][0] = sRight;
-   st->bitmap.vertices[2][2][1] = tBot;
-   
-   st->bitmap.vertices[3][0][0] = clip_x0;
-   st->bitmap.vertices[3][0][1] = clip_y1;
-   st->bitmap.vertices[3][2][0] = sLeft;
-   st->bitmap.vertices[3][2][1] = tBot;
-   
-   /* same for all verts: */
-   for (i = 0; i < 4; i++) {
-      st->bitmap.vertices[i][0][2] = z;
-      st->bitmap.vertices[i][0][3] = 1.0;
-      st->bitmap.vertices[i][1][0] = color[0];
-      st->bitmap.vertices[i][1][1] = color[1];
-      st->bitmap.vertices[i][1][2] = color[2];
-      st->bitmap.vertices[i][1][3] = color[3];
-      st->bitmap.vertices[i][2][2] = 0.0; /*R*/
-      st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
-   }
-
-   /* put vertex data into vbuf */
-   pipe_buffer_write_nooverlap(st->pipe,
-                               st->bitmap.vbuf,
-                               st->bitmap.vbuf_slot
-                               * sizeof(st->bitmap.vertices),
-                               sizeof st->bitmap.vertices,
-                               st->bitmap.vertices);
-
-   return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
-}
-
-
 
 /**
- * Render a glBitmap by drawing a textured quad
+ * Setup pipeline state prior to rendering the bitmap textured quad.
  */
 static void
-draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
-                 GLsizei width, GLsizei height,
-                 struct pipe_sampler_view *sv,
-                 const GLfloat *color)
+setup_render_state(struct gl_context *ctx,
+                   struct pipe_sampler_view *sv,
+                   const GLfloat *color)
 {
    struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
    struct cso_context *cso = st->cso_context;
    struct st_fp_variant *fpv;
    struct st_fp_variant_key key;
-   GLuint maxSize;
-   GLuint offset;
 
    memset(&key, 0, sizeof(key));
-   key.st = st;
+   key.st = st->has_shareable_shaders ? NULL : st;
    key.bitmap = GL_TRUE;
+   key.clamp_color = st->clamp_frag_color_in_shader &&
+                     st->ctx->Color._ClampFragmentColor;
 
    fpv = st_get_fp_variant(st, st->fp, &key);
 
@@ -435,31 +209,26 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
       GLfloat colorSave[4];
       COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
       COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
-      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+      st_upload_constants(st, st->fp->Base.Base.Parameters,
+                          PIPE_SHADER_FRAGMENT);
       COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
    }
 
-
-   /* limit checks */
-   /* XXX if the bitmap is larger than the max texture size, break
-    * it up into chunks.
-    */
-   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
-                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
-   assert(width <= (GLsizei)maxSize);
-   assert(height <= (GLsizei)maxSize);
-
    cso_save_rasterizer(cso);
-   cso_save_samplers(cso);
+   cso_save_fragment_samplers(cso);
    cso_save_fragment_sampler_views(cso);
    cso_save_viewport(cso);
    cso_save_fragment_shader(cso);
+   cso_save_stream_outputs(cso);
    cso_save_vertex_shader(cso);
+   cso_save_tessctrl_shader(cso);
+   cso_save_tesseval_shader(cso);
+   cso_save_geometry_shader(cso);
    cso_save_vertex_elements(cso);
-   cso_save_vertex_buffers(cso);
+   cso_save_aux_vertex_buffer_slot(cso);
 
    /* rasterizer state: just scissor */
-   st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
+   st->bitmap.rasterizer.scissor = ctx->Scissor.EnableFlags & 1;
    cso_set_rasterizer(cso, &st->bitmap.rasterizer);
 
    /* fragment shader state: TEX lookup program */
@@ -468,78 +237,193 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    /* vertex shader state: position + texcoord pass-through */
    cso_set_vertex_shader_handle(cso, st->bitmap.vs);
 
+   /* disable other shaders */
+   cso_set_tessctrl_shader_handle(cso, NULL);
+   cso_set_tesseval_shader_handle(cso, NULL);
+   cso_set_geometry_shader_handle(cso, NULL);
+
    /* user samplers, plus our bitmap sampler */
    {
       struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
-      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
+      uint num = MAX2(fpv->bitmap_sampler + 1,
+                      st->state.num_samplers[PIPE_SHADER_FRAGMENT]);
       uint i;
-      for (i = 0; i < st->state.num_samplers; i++) {
-         samplers[i] = &st->state.samplers[i];
+      for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) {
+         samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i];
       }
-      samplers[fpv->bitmap_sampler] =
-         &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
-      cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
+      samplers[fpv->bitmap_sampler] = &st->bitmap.sampler;
+      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num,
+                       (const struct pipe_sampler_state **) samplers);
    }
 
    /* user textures, plus the bitmap texture */
    {
       struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
-      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
-      memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
+      uint num = MAX2(fpv->bitmap_sampler + 1,
+                      st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
+      memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
+             sizeof(sampler_views));
       sampler_views[fpv->bitmap_sampler] = sv;
-      cso_set_fragment_sampler_views(cso, num, sampler_views);
+      cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num, sampler_views);
    }
 
    /* viewport state: viewport matching window dims */
    {
-      const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-      const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
-      const GLfloat width = (GLfloat)fb->Width;
-      const GLfloat height = (GLfloat)fb->Height;
+      const GLboolean invert = st->state.fb_orientation == Y_0_TOP;
+      const GLfloat width = (GLfloat)st->state.framebuffer.width;
+      const GLfloat height = (GLfloat)st->state.framebuffer.height;
       struct pipe_viewport_state vp;
       vp.scale[0] =  0.5f * width;
       vp.scale[1] = height * (invert ? -0.5f : 0.5f);
       vp.scale[2] = 0.5f;
-      vp.scale[3] = 1.0f;
       vp.translate[0] = 0.5f * width;
       vp.translate[1] = 0.5f * height;
       vp.translate[2] = 0.5f;
-      vp.translate[3] = 0.0f;
       cso_set_viewport(cso, &vp);
    }
 
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+   cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+}
 
-   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
-   z = z * 2.0 - 1.0;
-
-   /* draw textured quad */
-   offset = setup_bitmap_vertex_data(st,
-                                     sv->texture->target != PIPE_TEXTURE_RECT,
-                                     x, y, width, height, z, color);
-
-   util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset,
-                           PIPE_PRIM_TRIANGLE_FAN,
-                           4,  /* verts */
-                           3); /* attribs/vert */
 
+/**
+ * Restore pipeline state after rendering the bitmap textured quad.
+ */
+static void
+restore_render_state(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct cso_context *cso = st->cso_context;
 
-   /* restore state */
    cso_restore_rasterizer(cso);
-   cso_restore_samplers(cso);
+   cso_restore_fragment_samplers(cso);
    cso_restore_fragment_sampler_views(cso);
    cso_restore_viewport(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_tessctrl_shader(cso);
+   cso_restore_tesseval_shader(cso);
+   cso_restore_geometry_shader(cso);
    cso_restore_vertex_elements(cso);
-   cso_restore_vertex_buffers(cso);
+   cso_restore_aux_vertex_buffer_slot(cso);
+   cso_restore_stream_outputs(cso);
 }
 
 
+/**
+ * Render a glBitmap by drawing a textured quad
+ */
 static void
-reset_cache(struct st_context *st)
+draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
+                 GLsizei width, GLsizei height,
+                 struct pipe_sampler_view *sv,
+                 const GLfloat *color)
 {
+   struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *vbuf = NULL;
+   const float fb_width = (float) st->state.framebuffer.width;
+   const float fb_height = (float) st->state.framebuffer.height;
+   const float x0 = (float) x;
+   const float x1 = (float) (x + width);
+   const float y0 = (float) y;
+   const float y1 = (float) (y + height);
+   float sLeft = 0.0f, sRight = 1.0f;
+   float tTop = 0.0f, tBot = 1.0f - tTop;
+   const float clip_x0 = x0 / fb_width * 2.0f - 1.0f;
+   const float clip_y0 = y0 / fb_height * 2.0f - 1.0f;
+   const float clip_x1 = x1 / fb_width * 2.0f - 1.0f;
+   const float clip_y1 = y1 / fb_height * 2.0f - 1.0f;
+   float (*vertices)[3][4];  /**< vertex pos + color + texcoord */
+   unsigned offset, i;
+
+   /* limit checks */
+   {
+      /* XXX if the bitmap is larger than the max texture size, break
+       * it up into chunks.
+       */
+      GLuint maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
+      assert(width <= (GLsizei) maxSize);
+      assert(height <= (GLsizei) maxSize);
+   }
+
+   setup_render_state(ctx, sv, color);
+
+   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+   z = z * 2.0f - 1.0f;
+
+   if (sv->texture->target == PIPE_TEXTURE_RECT) {
+      /* use non-normalized texcoords */
+      sRight = (float) width;
+      tBot = (float) height;
+   }
+
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
+                  &offset, &vbuf, (void **) &vertices);
+   if (!vbuf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap");
+      restore_render_state(ctx);
+      return;
+   }
+
+   /* Positions are in clip coords since we need to do clipping in case
+    * the bitmap quad goes beyond the window bounds.
+    */
+   vertices[0][0][0] = clip_x0;
+   vertices[0][0][1] = clip_y0;
+   vertices[0][2][0] = sLeft;
+   vertices[0][2][1] = tTop;
+
+   vertices[1][0][0] = clip_x1;
+   vertices[1][0][1] = clip_y0;
+   vertices[1][2][0] = sRight;
+   vertices[1][2][1] = tTop;
+
+   vertices[2][0][0] = clip_x1;
+   vertices[2][0][1] = clip_y1;
+   vertices[2][2][0] = sRight;
+   vertices[2][2][1] = tBot;
+
+   vertices[3][0][0] = clip_x0;
+   vertices[3][0][1] = clip_y1;
+   vertices[3][2][0] = sLeft;
+   vertices[3][2][1] = tBot;
+
+   /* same for all verts: */
+   for (i = 0; i < 4; i++) {
+      vertices[i][0][2] = z;
+      vertices[i][0][3] = 1.0f;
+      vertices[i][1][0] = color[0];
+      vertices[i][1][1] = color[1];
+      vertices[i][1][2] = color[2];
+      vertices[i][1][3] = color[3];
+      vertices[i][2][2] = 0.0; /*R*/
+      vertices[i][2][3] = 1.0; /*Q*/
+   }
+
+   u_upload_unmap(st->uploader);
+
+   util_draw_vertex_buffer(pipe, st->cso_context, vbuf,
+                           cso_get_aux_vertex_buffer_slot(st->cso_context),
+                           offset,
+                           PIPE_PRIM_TRIANGLE_FAN,
+                           4,  /* verts */
+                           3); /* attribs/vert */
+
+   restore_render_state(ctx);
+
+   pipe_resource_reference(&vbuf, NULL);
+
+   /* We uploaded modified constants, need to invalidate them. */
+   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
+}
+
+
+static void
+reset_cache(struct st_context *st)
+{
    struct bitmap_cache *cache = st->bitmap.cache;
 
    /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
@@ -550,18 +434,13 @@ reset_cache(struct st_context *st)
    cache->ymin = 1000000;
    cache->ymax = -1000000;
 
-   if (cache->trans) {
-      pipe->transfer_destroy(pipe, cache->trans);
-      cache->trans = NULL;
-   }
-
    assert(!cache->texture);
 
    /* allocate a new texture */
-   cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
+   cache->texture = st_texture_create(st, st->internal_target,
                                       st->bitmap.tex_format, 0,
                                       BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
-                                      1, 1,
+                                      1, 1, 0,
                                      PIPE_BIND_SAMPLER_VIEW);
 }
 
@@ -601,11 +480,10 @@ create_cache_trans(struct st_context *st)
    /* Map the texture transfer.
     * Subsequent glBitmap calls will write into the texture image.
     */
-   cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
-                                    PIPE_TRANSFER_WRITE, 0, 0,
-                                    BITMAP_CACHE_WIDTH,
-                                    BITMAP_CACHE_HEIGHT);
-   cache->buffer = pipe_transfer_map(pipe, cache->trans);
+   cache->buffer = pipe_transfer_map(pipe, cache->texture, 0, 0,
+                                     PIPE_TRANSFER_WRITE, 0, 0,
+                                     BITMAP_CACHE_WIDTH,
+                                     BITMAP_CACHE_HEIGHT, &cache->trans);
 
    /* init image to all 0xff */
    memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
@@ -618,46 +496,42 @@ create_cache_trans(struct st_context *st)
 void
 st_flush_bitmap_cache(struct st_context *st)
 {
-   if (!st->bitmap.cache->empty) {
-      struct bitmap_cache *cache = st->bitmap.cache;
+   struct bitmap_cache *cache = st->bitmap.cache;
 
-      if (st->ctx->DrawBuffer) {
-         struct pipe_context *pipe = st->pipe;
-         struct pipe_sampler_view *sv;
+   if (cache && !cache->empty) {
+      struct pipe_context *pipe = st->pipe;
+      struct pipe_sampler_view *sv;
 
-         assert(cache->xmin <= cache->xmax);
-/*         printf("flush size %d x %d  at %d, %d\n",
+      assert(cache->xmin <= cache->xmax);
+
+      if (0)
+         printf("flush bitmap, size %d x %d  at %d, %d\n",
                 cache->xmax - cache->xmin,
                 cache->ymax - cache->ymin,
                 cache->xpos, cache->ypos);
-*/
 
-         /* The texture transfer has been mapped until now.
-          * So unmap and release the texture transfer before drawing.
-          */
-         if (cache->trans) {
-            if (0)
-               print_cache(cache);
-            pipe_transfer_unmap(pipe, cache->trans);
-            cache->buffer = NULL;
-
-            pipe->transfer_destroy(pipe, cache->trans);
-            cache->trans = NULL;
-         }
-
-         sv = st_create_texture_sampler_view(st->pipe, cache->texture);
-         if (sv) {
-            draw_bitmap_quad(st->ctx,
-                             cache->xpos,
-                             cache->ypos,
-                             cache->zpos,
-                             BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
-                             sv,
-                             cache->color);
-
-            pipe_sampler_view_reference(&sv, NULL);
-         }
+      /* The texture transfer has been mapped until now.
+       * So unmap and release the texture transfer before drawing.
+       */
+      if (cache->trans && cache->buffer) {
+         if (0)
+            print_cache(cache);
+         pipe_transfer_unmap(pipe, cache->trans);
+         cache->buffer = NULL;
+         cache->trans = NULL;
+      }
+
+      sv = st_create_texture_sampler_view(st->pipe, cache->texture);
+      if (sv) {
+         draw_bitmap_quad(st->ctx,
+                          cache->xpos,
+                          cache->ypos,
+                          cache->zpos,
+                          BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
+                          sv,
+                          cache->color);
+
+         pipe_sampler_view_reference(&sv, NULL);
       }
 
       /* release/free the texture */
@@ -668,32 +542,17 @@ st_flush_bitmap_cache(struct st_context *st)
 }
 
 
-/**
- * Flush bitmap cache and release vertex buffer.
- */
-void
-st_flush_bitmap( struct st_context *st )
-{
-   st_flush_bitmap_cache(st);
-
-   /* Release vertex buffer to avoid synchronous rendering if we were
-    * to map it in the next frame.
-    */
-   pipe_resource_reference(&st->bitmap.vbuf, NULL);
-   st->bitmap.vbuf_slot = 0;
-}
-
-
 /**
  * Try to accumulate this glBitmap call in the bitmap cache.
  * \return  GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
  */
 static GLboolean
-accum_bitmap(struct st_context *st,
+accum_bitmap(struct gl_context *ctx,
              GLint x, GLint y, GLsizei width, GLsizei height,
              const struct gl_pixelstore_attrib *unpack,
              const GLubyte *bitmap )
 {
+   struct st_context *st = ctx->st;
    struct bitmap_cache *cache = st->bitmap.cache;
    int px = -999, py = -999;
    const GLfloat z = st->ctx->Current.RasterPos[2];
@@ -743,13 +602,92 @@ accum_bitmap(struct st_context *st,
    /* create the transfer if needed */
    create_cache_trans(st);
 
+   /* PBO source... */
+   bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
+   if (!bitmap) {
+      return FALSE;
+   }
+
    unpack_bitmap(st, px, py, width, height, unpack, bitmap,
                  cache->buffer, BITMAP_CACHE_WIDTH);
 
+   _mesa_unmap_pbo_source(ctx, unpack);
+
    return GL_TRUE; /* accumulated */
 }
 
 
+/**
+ * One-time init for drawing bitmaps.
+ */
+static void
+init_bitmap_state(struct st_context *st)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+
+   /* This function should only be called once */
+   assert(st->bitmap.cache == NULL);
+
+   assert(st->internal_target == PIPE_TEXTURE_2D ||
+          st->internal_target == PIPE_TEXTURE_RECT);
+
+   /* alloc bitmap cache object */
+   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
+
+   /* init sampler state once */
+   memset(&st->bitmap.sampler, 0, sizeof(st->bitmap.sampler));
+   st->bitmap.sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
+   st->bitmap.sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
+   st->bitmap.sampler.wrap_r = PIPE_TEX_WRAP_CLAMP;
+   st->bitmap.sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   st->bitmap.sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   st->bitmap.sampler.normalized_coords = st->internal_target == PIPE_TEXTURE_2D;
+
+   /* init baseline rasterizer state once */
+   memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
+   st->bitmap.rasterizer.half_pixel_center = 1;
+   st->bitmap.rasterizer.bottom_edge_rule = 1;
+   st->bitmap.rasterizer.depth_clip = 1;
+
+   /* find a usable texture format */
+   if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
+                                   st->internal_target, 0,
+                                   PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
+   }
+   else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
+                                        st->internal_target, 0,
+                                        PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
+   }
+   else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
+                                        st->internal_target, 0,
+                                        PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
+   }
+   else {
+      /* XXX support more formats */
+      assert(0);
+   }
+
+   /* Create the vertex shader */
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_COLOR,
+        st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indexes[] = { 0, 0, 0 };
+      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
+                                                          semantic_names,
+                                                          semantic_indexes,
+                                                          FALSE);
+   }
+
+   reset_cache(st);
+}
+
 
 /**
  * Called via ctx->Driver.Bitmap()
@@ -762,23 +700,23 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
    struct st_context *st = st_context(ctx);
    struct pipe_resource *pt;
 
-   if (width == 0 || height == 0)
-      return;
+   assert(width > 0);
+   assert(height > 0);
 
-   st_validate_state(st);
+   if (!st->bitmap.cache) {
+      init_bitmap_state(st);
+   }
 
-   if (!st->bitmap.vs) {
-      /* create pass-through vertex shader now */
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_COLOR,
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indexes[] = { 0, 0, 0 };
-      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
-                                                          semantic_names,
-                                                          semantic_indexes);
+   /* We only need to validate state of the st dirty flags are set or
+    * any non-_NEW_PROGRAM_CONSTANTS mesa flags are set.  The VS we use
+    * for bitmap drawing uses no constants and the FS constants are
+    * explicitly uploaded in the draw_bitmap_quad() function.
+    */
+   if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
+      st_validate_state(st, ST_PIPELINE_RENDER);
    }
 
-   if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
+   if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
       return;
 
    pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
@@ -810,57 +748,6 @@ st_init_bitmap_functions(struct dd_function_table *functions)
 }
 
 
-/** Per-context init */
-void
-st_init_bitmap(struct st_context *st)
-{
-   struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_screen *screen = pipe->screen;
-
-   /* init sampler state once */
-   memset(sampler, 0, sizeof(*sampler));
-   sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
-   sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
-   sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
-   sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   st->bitmap.samplers[1] = *sampler;
-   st->bitmap.samplers[1].normalized_coords = 1;
-
-   /* init baseline rasterizer state once */
-   memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
-   st->bitmap.rasterizer.gl_rasterization_rules = 1;
-
-   /* find a usable texture format */
-   if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
-                                   PIPE_TEXTURE_2D, 0,
-                                   PIPE_BIND_SAMPLER_VIEW, 0)) {
-      st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
-   }
-   else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
-                                        PIPE_TEXTURE_2D, 0,
-                                        PIPE_BIND_SAMPLER_VIEW, 0)) {
-      st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
-   }
-   else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
-                                        PIPE_TEXTURE_2D, 0,
-                                        PIPE_BIND_SAMPLER_VIEW, 0)) {
-      st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
-   }
-   else {
-      /* XXX support more formats */
-      assert(0);
-   }
-
-   /* alloc bitmap cache object */
-   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
-
-   reset_cache(st);
-}
-
-
 /** Per-context tear-down */
 void
 st_destroy_bitmap(struct st_context *st)
@@ -873,20 +760,12 @@ st_destroy_bitmap(struct st_context *st)
       st->bitmap.vs = NULL;
    }
 
-   if (st->bitmap.vbuf) {
-      pipe_resource_reference(&st->bitmap.vbuf, NULL);
-      st->bitmap.vbuf = NULL;
-   }
-
    if (cache) {
-      if (cache->trans) {
+      if (cache->trans && cache->buffer) {
          pipe_transfer_unmap(pipe, cache->trans);
-         pipe->transfer_destroy(pipe, cache->trans);
       }
       pipe_resource_reference(&st->bitmap.cache->texture, NULL);
       free(st->bitmap.cache);
       st->bitmap.cache = NULL;
    }
 }
-
-#endif /* FEATURE_drawpix */