Accelerate glBitmap with a color expand blit. Nice speedup for demos
authorKeith Whitwell <keith@tungstengraphics.com>
Thu, 5 Oct 2006 11:35:57 +0000 (11:35 +0000)
committerKeith Whitwell <keith@tungstengraphics.com>
Thu, 5 Oct 2006 11:35:57 +0000 (11:35 +0000)
like 'fire' that display a help message or fps number this way.

src/mesa/drivers/dri/i965/Makefile
src/mesa/drivers/dri/i965/intel_blit.c
src/mesa/drivers/dri/i965/intel_blit.h
src/mesa/drivers/dri/i965/intel_context.c
src/mesa/drivers/dri/i965/intel_context.h
src/mesa/drivers/dri/i965/intel_pixel_bitmap.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/intel_pixel_copy.c

index dfa9318a687fa7b2e7b8c18bbd962b5cc141af74..213eac895cbc02299506671c560638ad3ef597df 100644 (file)
@@ -17,6 +17,7 @@ DRIVER_SOURCES = \
        intel_screen.c \
        intel_span.c \
        intel_pixel_copy.c \
+       intel_pixel_bitmap.c \
        intel_state.c \
        intel_tex.c \
        intel_tex_validate.c \
index b09b0a95e73812a3478f0fb275b10e1f58468010..e501f5e6609e511e27ef0a4c2286b391a4d91c83 100644 (file)
@@ -39,6 +39,7 @@
 #include "intel_context.h"
 #include "intel_blit.h"
 #include "intel_regions.h"
+#include "intel_structs.h"
 
 #include "bufmgr.h"
 
@@ -493,3 +494,98 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all,
 }
 
 
+
+#define BR13_565  0x1
+#define BR13_8888 0x3
+
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+                                 GLuint cpp,
+                                 GLubyte *src_bits, GLuint src_size,
+                                 GLuint fg_color,
+                                 GLshort dst_pitch,
+                                 struct buffer *dst_buffer,
+                                 GLuint dst_offset,
+                                 GLboolean dst_tiled,
+                                 GLshort x, GLshort y, 
+                                 GLshort w, GLshort h)
+{
+   struct xy_setup_blit setup;
+   struct xy_text_immediate_blit text;
+   int dwords = ((src_size + 7) & ~7) / 4;
+
+
+   if (w < 0 || h < 0) 
+      return;
+
+   dst_pitch *= cpp;
+
+   if (dst_tiled) 
+      dst_pitch /= 4;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   memset(&setup, 0, sizeof(setup));
+   
+   setup.br0.client = CLIENT_2D;
+   setup.br0.opcode = OPCODE_XY_SETUP_BLT;
+   setup.br0.write_alpha = (cpp == 4);
+   setup.br0.write_rgb = (cpp == 4);
+   setup.br0.dst_tiled = dst_tiled;
+   setup.br0.length = (sizeof(setup) / sizeof(int)) - 2;
+      
+   setup.br13.dest_pitch = dst_pitch;
+   setup.br13.rop = 0xcc;
+   setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565;
+   setup.br13.clipping_enable = 0;
+   setup.br13.mono_source_transparency = 1;
+
+   setup.dw2.clip_y1 = 0;
+   setup.dw2.clip_x1 = 0;
+   setup.dw3.clip_y2 = 100;
+   setup.dw3.clip_x2 = 100;
+
+   setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset;
+   setup.background_color = 0;
+   setup.foreground_color = fg_color;
+   setup.pattern_base_addr = 0;
+
+   memset(&text, 0, sizeof(text));
+   text.dw0.client = CLIENT_2D;
+   text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT;
+   text.dw0.pad0 = 0;
+   text.dw0.byte_packed = 1;   /* ?maybe? */
+   text.dw0.pad1 = 0;
+   text.dw0.dst_tiled = dst_tiled;
+   text.dw0.pad2 = 0;
+   text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords;
+   text.dw1.dest_y1 = y;       /* duplicates info in setup blit */
+   text.dw1.dest_x1 = x;
+   text.dw2.dest_y2 = y + h;
+   text.dw2.dest_x2 = x + w;
+
+   intel_batchbuffer_require_space( intel->batch,
+                                   sizeof(setup) + 
+                                   sizeof(text) + 
+                                   dwords,
+                                   INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+                          &setup,
+                          sizeof(setup),
+                          INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+                          &text,
+                          sizeof(text),
+                          INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+                          src_bits,
+                          dwords * 4,
+                          INTEL_BATCH_NO_CLIPRECTS );
+}
+
index 357ceb4c512142026604a9e476190717a9053391..71ce8307784c6b381ac72a88e5c5168be68faba8 100644 (file)
@@ -62,5 +62,16 @@ extern void intelEmitFillBlit( struct intel_context *intel,
                               GLshort w, GLshort h,
                               GLuint color );
 
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+                                 GLuint cpp,
+                                 GLubyte *src_bits, GLuint src_size,
+                                 GLuint fg_color,
+                                 GLshort dst_pitch,
+                                 struct buffer *dst_buffer,
+                                 GLuint dst_offset,
+                                 GLboolean dst_tiled,
+                                 GLshort dst_x, GLshort dst_y, 
+                                 GLshort w, GLshort h);
 
 #endif
index 5f19137d58eb285dae062a2969cb6af94e6c5f03..d65de633fd01f7238b6b70fb8ee5ae6f5e211eda 100644 (file)
@@ -258,7 +258,6 @@ void intelInitDriverFunctions( struct dd_function_table *functions )
    /* Pixel path fallbacks.
     */
    functions->Accum = _swrast_Accum;
-   functions->Bitmap = _swrast_Bitmap;
    functions->ReadPixels = _swrast_ReadPixels;
    functions->DrawPixels = _swrast_DrawPixels;
 
@@ -266,6 +265,12 @@ void intelInitDriverFunctions( struct dd_function_table *functions )
     * manager:
     */
    functions->CopyPixels = intelCopyPixels;
+   functions->Bitmap = intelBitmap;
+
+   if (getenv("INTEL_NO_BLIT")) {
+      functions->Bitmap = _swrast_Bitmap;
+      functions->CopyPixels = _swrast_CopyPixels;
+   }
 
    intelInitTextureFuncs( functions );
    intelInitStateFuncs( functions );
index d0354cfc8f9553e8d9747bbf3adfffcb847944ee..4ca356beff8330ec2d9552957a2e95e0483ea47f 100644 (file)
@@ -486,6 +486,14 @@ void intelCopyPixels(GLcontext * ctx,
                      GLsizei width, GLsizei height,
                      GLint destx, GLint desty, GLenum type);
 
+GLboolean intel_check_blit_fragment_ops(GLcontext * ctx);
+
+void intelBitmap(GLcontext * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLubyte * pixels);
+
 #define _NEW_WINDOW_POS 0x40000000
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
new file mode 100644 (file)
index 0000000..5841afa
--- /dev/null
@@ -0,0 +1,350 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "colormac.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+
+
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( GLcontext *ctx,
+                              GLsizei width, GLsizei height,
+                              const struct gl_pixelstore_attrib *unpack,
+                              const GLubyte *bitmap )
+{
+   GLubyte *buf;
+
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                 GL_COLOR_INDEX, GL_BITMAP,
+                                 (GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                                          GL_READ_ONLY_ARB,
+                                          unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+
+   return ADD_POINTERS(buf, bitmap);
+}
+
+static GLboolean test_bit( const GLubyte *src,
+                           GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+
+static void set_bit( GLubyte *dest,
+                         GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+
+static int align(int x, int align)
+{
+   return (x + align - 1) & ~(align - 1);
+}
+
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per-cliprect.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+                             const struct gl_pixelstore_attrib *unpack,
+                             const GLubyte *bitmap,
+                             GLuint x, GLuint y, 
+                             GLuint w, GLuint h,
+                             GLubyte *dest,
+                             GLuint row_align,
+                             GLboolean invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+                  __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
+                                                   width, height, 
+                                                   GL_COLOR_INDEX, GL_BITMAP, 
+                                                   y + row, x);
+
+      for (col = 0; col < w; col++, bit++) {
+        if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+           set_bit(dest, bit ^ 7);
+           count++;
+        }
+      }
+
+      if (row_align)
+        bit = (bit + row_align - 1) & ~(row_align - 1);
+   }
+
+   return count;
+}
+
+
+
+
+/*
+ * Render a bitmap.
+ */
+static GLboolean
+do_blit_bitmap( GLcontext *ctx, 
+               GLint dstx, GLint dsty,
+               GLsizei width, GLsizei height,
+               const struct gl_pixelstore_attrib *unpack,
+               const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   
+   union {
+      GLuint ui;
+      GLubyte ub[4];
+   } color;
+
+
+   if (unpack->BufferObj->Name) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+        return GL_TRUE;        /* even though this is an error, we're done */
+   }
+
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], ctx->Current.RasterColor[2]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], ctx->Current.RasterColor[1]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], ctx->Current.RasterColor[0]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], ctx->Current.RasterColor[3]);
+
+   /* Does zoom apply to bitmaps?
+    */
+   if (!intel_check_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint srcx = 0, srcy = 0;
+      GLint orig_screen_x1, orig_screen_y2;
+      GLuint i;
+
+
+      orig_screen_x1 = dPriv->x + dstx;
+      orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+        GLint x = ctx->Scissor.X;
+        GLint y = ctx->Scissor.Y;
+        GLuint w = ctx->Scissor.Width;
+        GLuint h = ctx->Scissor.Height;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->y + (dPriv->h - dsty - height);  
+      dstx = dPriv->x + dstx;
+
+      dest_rect.x1 = dstx;
+      dest_rect.y1 = dsty;
+      dest_rect.x2 = dstx + width;
+      dest_rect.y2 = dsty + height;
+
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+        int box_w, box_h;
+        GLint px, py;
+        GLuint stipple[32];  
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+        /* Now go back to GL coordinates to figure out what subset of
+         * the bitmap we are uploading for this cliprect:
+         */
+        box_w = rect.x2 - rect.x1;
+        box_h = rect.y2 - rect.y1;
+        srcx = rect.x1 - orig_screen_x1;
+        srcy = orig_screen_y2 - rect.y2;
+
+
+#define DY 32
+#define DX 32
+
+        /* Then, finally, chop it all into chunks that can be
+         * digested by hardware:
+         */
+        for (py = 0; py < box_h; py += DY) { 
+           for (px = 0; px < box_w; px += DX) { 
+              int h = MIN2(DY, box_h - py);
+              int w = MIN2(DX, box_w - px); 
+              GLuint sz = align(align(w,8) * h, 64)/8;
+           
+              assert(sz <= sizeof(stipple));
+              memset(stipple, 0, sz);
+
+              /* May need to adjust this when padding has been introduced in
+               * sz above:
+               */
+              if (get_bitmap_rect(width, height, unpack, 
+                                  bitmap,
+                                  srcx + px, srcy + py, w, h,
+                                  (GLubyte *)stipple,
+                                  8,
+                                  GL_TRUE) == 0)
+                 continue;
+
+              /* 
+               */
+              intelEmitImmediateColorExpandBlit( intel,
+                                                 dst->cpp,
+                                                 (GLubyte *)stipple, 
+                                                 sz,
+                                                 color.ui,
+                                                 dst->pitch,
+                                                 dst->buffer,
+                                                 0,
+                                                 dst->tiled,
+                                                 rect.x1 + px,
+                                                 rect.y2 - (py + h),
+                                                 w, h);
+           } 
+        } 
+      }
+      intel->need_flush = GL_TRUE;
+   out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   return GL_TRUE;
+}
+
+
+
+
+
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ * 
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT 
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(GLcontext * ctx,
+           GLint x, GLint y,
+           GLsizei width, GLsizei height,
+           const struct gl_pixelstore_attrib *unpack,
+           const GLubyte * pixels)
+{
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}
index ad27867aeaed7adb388dd43260057dc1a8940087..d5d48994529cb0cc9aee822c21b94170d747b81d 100644 (file)
@@ -74,7 +74,7 @@ copypix_src_region(struct intel_context *intel, GLenum type)
  * Check if any fragment operations are in effect which might effect
  * glDraw/CopyPixels.
  */
-static GLboolean
+GLboolean
 intel_check_blit_fragment_ops(GLcontext * ctx)
 {
    if (ctx->NewState)
@@ -83,6 +83,7 @@ intel_check_blit_fragment_ops(GLcontext * ctx)
    /* Could do logicop with the blitter: 
     */
    return !(ctx->_ImageTransferState ||
+           ctx->RenderMode != GL_RENDER ||
             ctx->Color.AlphaEnabled ||
             ctx->Depth.Test ||
             ctx->Fog.Enabled ||
@@ -90,8 +91,8 @@ intel_check_blit_fragment_ops(GLcontext * ctx)
             !ctx->Color.ColorMask[0] ||
             !ctx->Color.ColorMask[1] ||
             !ctx->Color.ColorMask[2] ||
-            !ctx->Color.ColorMask[3] ||
-            ctx->Color.ColorLogicOpEnabled ||
+            !ctx->Color.ColorMask[3] ||        /* can do this! */
+            ctx->Color.ColorLogicOpEnabled || /* can do this! */
             ctx->Texture._EnabledUnits ||
            ctx->FragmentProgram._Enabled);
 }
@@ -150,7 +151,7 @@ do_blit_copypixels(GLcontext * ctx,
         GLint dx = dstx - srcx;
          GLint dy = dsty - srcy;
 
-         if (!_mesa_clip_to_region(x, y, x+w, y+h, &dstx, &dsty, &width, &height))
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
             goto out;
         
          srcx = dstx - dx;