From: Keith Whitwell Date: Thu, 5 Oct 2006 11:35:57 +0000 (+0000) Subject: Accelerate glBitmap with a color expand blit. Nice speedup for demos X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=68da677ae29c0c38ca1bce1ce78087b9c7cf0e1a;p=mesa.git Accelerate glBitmap with a color expand blit. Nice speedup for demos like 'fire' that display a help message or fps number this way. --- diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index dfa9318a687..213eac895cb 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -17,6 +17,7 @@ DRIVER_SOURCES = \ intel_screen.c \ intel_span.c \ intel_pixel_copy.c \ + intel_pixel_bitmap.c \ intel_state.c \ intel_tex.c \ intel_tex_validate.c \ diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index b09b0a95e73..e501f5e6609 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -39,6 +39,7 @@ #include "intel_context.h" #include "intel_blit.h" #include "intel_regions.h" +#include "intel_structs.h" #include "bufmgr.h" @@ -493,3 +494,98 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all, } + +#define BR13_565 0x1 +#define BR13_8888 0x3 + + +void +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h) +{ + struct xy_setup_blit setup; + struct xy_text_immediate_blit text; + int dwords = ((src_size + 7) & ~7) / 4; + + + if (w < 0 || h < 0) + return; + + dst_pitch *= cpp; + + if (dst_tiled) + dst_pitch /= 4; + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); + + memset(&setup, 0, sizeof(setup)); + + setup.br0.client = CLIENT_2D; + setup.br0.opcode = OPCODE_XY_SETUP_BLT; + setup.br0.write_alpha = (cpp == 4); + setup.br0.write_rgb = (cpp == 4); + setup.br0.dst_tiled = dst_tiled; + setup.br0.length = (sizeof(setup) / sizeof(int)) - 2; + + setup.br13.dest_pitch = dst_pitch; + setup.br13.rop = 0xcc; + setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565; + setup.br13.clipping_enable = 0; + setup.br13.mono_source_transparency = 1; + + setup.dw2.clip_y1 = 0; + setup.dw2.clip_x1 = 0; + setup.dw3.clip_y2 = 100; + setup.dw3.clip_x2 = 100; + + setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset; + setup.background_color = 0; + setup.foreground_color = fg_color; + setup.pattern_base_addr = 0; + + memset(&text, 0, sizeof(text)); + text.dw0.client = CLIENT_2D; + text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT; + text.dw0.pad0 = 0; + text.dw0.byte_packed = 1; /* ?maybe? */ + text.dw0.pad1 = 0; + text.dw0.dst_tiled = dst_tiled; + text.dw0.pad2 = 0; + text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords; + text.dw1.dest_y1 = y; /* duplicates info in setup blit */ + text.dw1.dest_x1 = x; + text.dw2.dest_y2 = y + h; + text.dw2.dest_x2 = x + w; + + intel_batchbuffer_require_space( intel->batch, + sizeof(setup) + + sizeof(text) + + dwords, + INTEL_BATCH_NO_CLIPRECTS ); + + intel_batchbuffer_data( intel->batch, + &setup, + sizeof(setup), + INTEL_BATCH_NO_CLIPRECTS ); + + intel_batchbuffer_data( intel->batch, + &text, + sizeof(text), + INTEL_BATCH_NO_CLIPRECTS ); + + intel_batchbuffer_data( intel->batch, + src_bits, + dwords * 4, + INTEL_BATCH_NO_CLIPRECTS ); +} + diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index 357ceb4c512..71ce8307784 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -62,5 +62,16 @@ extern void intelEmitFillBlit( struct intel_context *intel, GLshort w, GLshort h, GLuint color ); +void +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort dst_x, GLshort dst_y, + GLshort w, GLshort h); #endif diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index 5f19137d58e..d65de633fd0 100644 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -258,7 +258,6 @@ void intelInitDriverFunctions( struct dd_function_table *functions ) /* Pixel path fallbacks. */ functions->Accum = _swrast_Accum; - functions->Bitmap = _swrast_Bitmap; functions->ReadPixels = _swrast_ReadPixels; functions->DrawPixels = _swrast_DrawPixels; @@ -266,6 +265,12 @@ void intelInitDriverFunctions( struct dd_function_table *functions ) * manager: */ functions->CopyPixels = intelCopyPixels; + functions->Bitmap = intelBitmap; + + if (getenv("INTEL_NO_BLIT")) { + functions->Bitmap = _swrast_Bitmap; + functions->CopyPixels = _swrast_CopyPixels; + } intelInitTextureFuncs( functions ); intelInitStateFuncs( functions ); diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h index d0354cfc8f9..4ca356beff8 100644 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -486,6 +486,14 @@ void intelCopyPixels(GLcontext * ctx, GLsizei width, GLsizei height, GLint destx, GLint desty, GLenum type); +GLboolean intel_check_blit_fragment_ops(GLcontext * ctx); + +void intelBitmap(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels); + #define _NEW_WINDOW_POS 0x40000000 diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c new file mode 100644 index 00000000000..5841afaa3ef --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -0,0 +1,350 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "enums.h" +#include "image.h" +#include "colormac.h" +#include "mtypes.h" +#include "macros.h" +#include "bufferobj.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_regions.h" +#include "intel_buffer_objects.h" + + + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + + +/* Unlike the other intel_pixel_* functions, the expectation here is + * that the incoming data is not in a PBO. With the XY_TEXT blit + * method, there's no benefit haveing it in a PBO, but we could + * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit + * PBO bitmaps. I think they are probably pretty rare though - I + * wonder if Xgl uses them? + */ +static const GLubyte *map_pbo( GLcontext *ctx, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + GLubyte *buf; + + if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, + GL_COLOR_INDEX, GL_BITMAP, + (GLvoid *) bitmap)) { + _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); + return NULL; + } + + buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, + GL_READ_ONLY_ARB, + unpack->BufferObj); + if (!buf) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); + return NULL; + } + + return ADD_POINTERS(buf, bitmap); +} + +static GLboolean test_bit( const GLubyte *src, + GLuint bit ) +{ + return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; +} + +static void set_bit( GLubyte *dest, + GLuint bit ) +{ + dest[bit/8] |= 1 << (bit % 8); +} + +static int align(int x, int align) +{ + return (x + align - 1) & ~(align - 1); +} + +/* Extract a rectangle's worth of data from the bitmap. Called + * per-cliprect. + */ +static GLuint get_bitmap_rect(GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + GLuint x, GLuint y, + GLuint w, GLuint h, + GLubyte *dest, + GLuint row_align, + GLboolean invert) +{ + GLuint src_offset = (x + unpack->SkipPixels) & 0x7; + GLuint mask = unpack->LsbFirst ? 0 : 7; + GLuint bit = 0; + GLint row, col; + GLint first, last; + GLint incr; + GLuint count = 0; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", + __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + + if (invert) { + first = h-1; + last = 0; + incr = -1; + } + else { + first = 0; + last = h-1; + incr = 1; + } + + /* Require that dest be pre-zero'd. + */ + for (row = first; row != (last+incr); row += incr) { + const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, + width, height, + GL_COLOR_INDEX, GL_BITMAP, + y + row, x); + + for (col = 0; col < w; col++, bit++) { + if (test_bit(rowsrc, (col + src_offset) ^ mask)) { + set_bit(dest, bit ^ 7); + count++; + } + } + + if (row_align) + bit = (bit + row_align - 1) & ~(row_align - 1); + } + + return count; +} + + + + +/* + * Render a bitmap. + */ +static GLboolean +do_blit_bitmap( GLcontext *ctx, + GLint dstx, GLint dsty, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *dst = intel_drawbuf_region(intel); + + union { + GLuint ui; + GLubyte ub[4]; + } color; + + + if (unpack->BufferObj->Name) { + bitmap = map_pbo(ctx, width, height, unpack, bitmap); + if (bitmap == NULL) + return GL_TRUE; /* even though this is an error, we're done */ + } + + UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], ctx->Current.RasterColor[2]); + UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], ctx->Current.RasterColor[1]); + UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], ctx->Current.RasterColor[0]); + UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], ctx->Current.RasterColor[3]); + + /* Does zoom apply to bitmaps? + */ + if (!intel_check_blit_fragment_ops(ctx) || + ctx->Pixel.ZoomX != 1.0F || + ctx->Pixel.ZoomY != 1.0F) + return GL_FALSE; + + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + __DRIdrawablePrivate *dPriv = intel->driDrawable; + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t dest_rect; + GLint nbox = dPriv->numClipRects; + GLint srcx = 0, srcy = 0; + GLint orig_screen_x1, orig_screen_y2; + GLuint i; + + + orig_screen_x1 = dPriv->x + dstx; + orig_screen_y2 = dPriv->y + (dPriv->h - dsty); + + /* Do scissoring in GL coordinates: + */ + if (ctx->Scissor.Enabled) + { + GLint x = ctx->Scissor.X; + GLint y = ctx->Scissor.Y; + GLuint w = ctx->Scissor.Width; + GLuint h = ctx->Scissor.Height; + + if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height)) + goto out; + } + + /* Convert from GL to hardware coordinates: + */ + dsty = dPriv->y + (dPriv->h - dsty - height); + dstx = dPriv->x + dstx; + + dest_rect.x1 = dstx; + dest_rect.y1 = dsty; + dest_rect.x2 = dstx + width; + dest_rect.y2 = dsty + height; + + for (i = 0; i < nbox; i++) { + drm_clip_rect_t rect; + int box_w, box_h; + GLint px, py; + GLuint stipple[32]; + + if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) + continue; + + /* Now go back to GL coordinates to figure out what subset of + * the bitmap we are uploading for this cliprect: + */ + box_w = rect.x2 - rect.x1; + box_h = rect.y2 - rect.y1; + srcx = rect.x1 - orig_screen_x1; + srcy = orig_screen_y2 - rect.y2; + + +#define DY 32 +#define DX 32 + + /* Then, finally, chop it all into chunks that can be + * digested by hardware: + */ + for (py = 0; py < box_h; py += DY) { + for (px = 0; px < box_w; px += DX) { + int h = MIN2(DY, box_h - py); + int w = MIN2(DX, box_w - px); + GLuint sz = align(align(w,8) * h, 64)/8; + + assert(sz <= sizeof(stipple)); + memset(stipple, 0, sz); + + /* May need to adjust this when padding has been introduced in + * sz above: + */ + if (get_bitmap_rect(width, height, unpack, + bitmap, + srcx + px, srcy + py, w, h, + (GLubyte *)stipple, + 8, + GL_TRUE) == 0) + continue; + + /* + */ + intelEmitImmediateColorExpandBlit( intel, + dst->cpp, + (GLubyte *)stipple, + sz, + color.ui, + dst->pitch, + dst->buffer, + 0, + dst->tiled, + rect.x1 + px, + rect.y2 - (py + h), + w, h); + } + } + } + intel->need_flush = GL_TRUE; + out: + intel_batchbuffer_flush(intel->batch); + } + UNLOCK_HARDWARE(intel); + + + if (unpack->BufferObj->Name) { + /* done with PBO so unmap it now */ + ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, + unpack->BufferObj); + } + + return GL_TRUE; +} + + + + + +/* There are a large number of possible ways to implement bitmap on + * this hardware, most of them have some sort of drawback. Here are a + * few that spring to mind: + * + * Blit: + * - XY_MONO_SRC_BLT_CMD + * - use XY_SETUP_CLIP_BLT for cliprect clipping. + * - XY_TEXT_BLT + * - XY_TEXT_IMMEDIATE_BLT + * - blit per cliprect, subject to maximum immediate data size. + * - XY_COLOR_BLT + * - per pixel or run of pixels + * - XY_PIXEL_BLT + * - good for sparse bitmaps + * + * 3D engine: + * - Point per pixel + * - Translate bitmap to an alpha texture and render as a quad + * - Chop bitmap up into 32x32 squares and render w/polygon stipple. + */ +void +intelBitmap(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels) +{ + if (do_blit_bitmap(ctx, x, y, width, height, + unpack, pixels)) + return; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + + _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels); +} diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index ad27867aeae..d5d48994529 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -74,7 +74,7 @@ copypix_src_region(struct intel_context *intel, GLenum type) * Check if any fragment operations are in effect which might effect * glDraw/CopyPixels. */ -static GLboolean +GLboolean intel_check_blit_fragment_ops(GLcontext * ctx) { if (ctx->NewState) @@ -83,6 +83,7 @@ intel_check_blit_fragment_ops(GLcontext * ctx) /* Could do logicop with the blitter: */ return !(ctx->_ImageTransferState || + ctx->RenderMode != GL_RENDER || ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || @@ -90,8 +91,8 @@ intel_check_blit_fragment_ops(GLcontext * ctx) !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || - ctx->Color.ColorLogicOpEnabled || + !ctx->Color.ColorMask[3] || /* can do this! */ + ctx->Color.ColorLogicOpEnabled || /* can do this! */ ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled); } @@ -150,7 +151,7 @@ do_blit_copypixels(GLcontext * ctx, GLint dx = dstx - srcx; GLint dy = dsty - srcy; - if (!_mesa_clip_to_region(x, y, x+w, y+h, &dstx, &dsty, &width, &height)) + if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height)) goto out; srcx = dstx - dx;