-/* $XFree86: xc/lib/GL/mesa/src/drv/r128/r128_span.c,v 1.8 2002/10/30 12:51:39 alanh Exp $ */
/**************************************************************************
Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
#include "r128_context.h"
#include "r128_ioctl.h"
-#include "r128_state.h"
#include "r128_span.h"
-#include "r128_tex.h"
#include "swrast/swrast.h"
#define HAVE_HW_DEPTH_SPANS 1
#define HAVE_HW_DEPTH_PIXELS 1
+#define HAVE_HW_STENCIL_SPANS 1
+#define HAVE_HW_STENCIL_PIXELS 1
#define LOCAL_VARS \
r128ContextPtr rmesa = R128_CONTEXT(ctx); \
- r128ScreenPtr r128scrn = rmesa->r128Screen; \
- __DRIscreenPrivate *sPriv = rmesa->driScreen; \
- __DRIdrawablePrivate *dPriv = rmesa->driDrawable; \
- GLuint pitch = r128scrn->frontPitch * r128scrn->cpp; \
+ __DRIscreen *sPriv = rmesa->driScreen; \
+ __DRIdrawable *dPriv = rmesa->driDrawable; \
+ driRenderbuffer *drb = (driRenderbuffer *) rb; \
GLuint height = dPriv->h; \
- char *buf = (char *)(sPriv->pFB + \
- rmesa->drawOffset + \
- (dPriv->x * r128scrn->cpp) + \
- (dPriv->y * pitch)); \
- char *read_buf = (char *)(sPriv->pFB + \
- rmesa->readOffset + \
- (dPriv->x * r128scrn->cpp) + \
- (dPriv->y * pitch)); \
GLuint p; \
- (void) read_buf; (void) buf; (void) p
+ (void) p;
#define LOCAL_DEPTH_VARS \
r128ContextPtr rmesa = R128_CONTEXT(ctx); \
r128ScreenPtr r128scrn = rmesa->r128Screen; \
- __DRIscreenPrivate *sPriv = rmesa->driScreen; \
- __DRIdrawablePrivate *dPriv = rmesa->driDrawable; \
+ __DRIscreen *sPriv = rmesa->driScreen; \
+ __DRIdrawable *dPriv = rmesa->driDrawable; \
GLuint height = dPriv->h; \
(void) r128scrn; (void) sPriv; (void) height
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
-
-#define CLIPPIXEL( _x, _y ) \
- ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
-
-
-#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i ) \
- if ( _y < miny || _y >= maxy ) { \
- _n1 = 0, _x1 = x; \
- } else { \
- _n1 = _n; \
- _x1 = _x; \
- if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
- if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx); \
- }
-
#define Y_FLIP( _y ) (height - _y - 1)
+#define HW_LOCK()
-#define HW_LOCK() \
- r128ContextPtr rmesa = R128_CONTEXT(ctx); \
- FLUSH_BATCH( rmesa ); \
- LOCK_HARDWARE( rmesa ); \
- r128WaitForIdleLocked( rmesa );
-
-#define HW_CLIPLOOP() \
- do { \
- __DRIdrawablePrivate *dPriv = rmesa->driDrawable; \
- int _nc = dPriv->numClipRects; \
- \
- while ( _nc-- ) { \
- int minx = dPriv->pClipRects[_nc].x1 - dPriv->x; \
- int miny = dPriv->pClipRects[_nc].y1 - dPriv->y; \
- int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x; \
- int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
-
-#define HW_ENDCLIPLOOP() \
- } \
- } while (0)
-
-#define HW_UNLOCK() \
- UNLOCK_HARDWARE( rmesa )
+#define HW_UNLOCK()
/* 16 bit, RGB565 color spanline and pixel functions
*/
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p, color) \
- p = R128PACKCOLOR565( color[0], color[1], color[2] )
-
-#define WRITE_RGBA( _x, _y, r, g, b, a ) \
- *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) | \
- (((int)g & 0xfc) << 3) | \
- (((int)b & 0xf8) >> 3))
-
-#define WRITE_PIXEL( _x, _y, p ) \
- *(GLushort *)(buf + _x*2 + _y*pitch) = p
-
-#define READ_RGBA( rgba, _x, _y ) \
- do { \
- GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch); \
- rgba[0] = (p >> 8) & 0xf8; \
- rgba[1] = (p >> 3) & 0xfc; \
- rgba[2] = (p << 3) & 0xf8; \
- rgba[3] = 0xff; \
- if ( rgba[0] & 0x08 ) rgba[0] |= 0x07; \
- if ( rgba[1] & 0x04 ) rgba[1] |= 0x03; \
- if ( rgba[2] & 0x08 ) rgba[2] |= 0x07; \
- } while (0)
-
-#define TAG(x) r128##x##_RGB565
-#include "spantmp.h"
-
-#define READ_DEPTH(d, _x, _y) \
- d = *(GLushort *)(buf + _x*2 + _y*pitch)
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
-/* 32 bit, ARGB8888 color spanline and pixel functions
- */
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p, color) \
- p = R128PACKCOLOR8888( color[0], color[1], color[2], color[3] )
-
-#define WRITE_RGBA( _x, _y, r, g, b, a ) \
- *(GLuint *)(buf + _x*4 + _y*pitch) = ((b << 0) | \
- (g << 8) | \
- (r << 16) | \
- (a << 24) )
+#define TAG(x) r128##x##_RGB565
+#define TAG2(x,y) r128##x##_RGB565##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
+ + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
-#define WRITE_PIXEL( _x, _y, p ) \
- *(GLuint *)(buf + _x*4 + _y*pitch) = p
-#define READ_RGBA( rgba, _x, _y ) \
-do { \
- GLuint p = *(GLuint *)(read_buf + _x*4 + _y*pitch); \
- rgba[0] = (p >> 16) & 0xff; \
- rgba[1] = (p >> 8) & 0xff; \
- rgba[2] = (p >> 0) & 0xff; \
- rgba[3] = 0xff;/*(p >> 24) & 0xff;*/ \
-} while (0)
-
-#define TAG(x) r128##x##_ARGB8888
-#include "spantmp.h"
-
-
-/* 24 bit, RGB888 color spanline and pixel functions */
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p, color) \
- p = R128PACKCOLOR888( color[0], color[1], color[2] )
-
-#define WRITE_RGBA(_x, _y, r, g, b, a) \
- *(GLuint *)(buf + _x*3 + _y*pitch) = ((r << 16) | \
- (g << 8) | \
- (b << 0))
-
-#define WRITE_PIXEL(_x, _y, p) \
- *(GLuint *)(buf + _x*3 + _y*pitch) = p
-
-#define READ_RGBA(rgba, _x, _y) \
- do { \
- GLuint p = *(GLuint *)(read_buf + _x*3 + _y*pitch); \
- rgba[0] = (p >> 16) & 0xff; \
- rgba[1] = (p >> 8) & 0xff; \
- rgba[2] = (p >> 0) & 0xff; \
- rgba[3] = 0xff; \
- } while (0)
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) r128##x##_ARGB8888
+#define TAG2(x,y) r128##x##_ARGB8888##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
+ + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+/* Idling in the depth/stencil span functions:
+ * For writes, the kernel reads from the given user-space buffer at dispatch
+ * time, and then writes to the depth buffer asynchronously.
+ * For reads, the kernel reads from the depth buffer and writes to the span
+ * temporary asynchronously.
+ * So, if we're going to read from the span temporary, we need to idle before
+ * doing so. But we don't need to idle after write, because the CPU won't
+ * be accessing the destination, only the accelerator (through 3d rendering or
+ * depth span reads)
+ * However, due to interactions from pixel cache between 2d (what we do with
+ * depth) and 3d (all other parts of the system), we idle at the begin and end
+ * of a set of span operations, which should cover the pix cache issue.
+ * Except, we still have major issues, as shown by no_rast=true glxgears, or
+ * stencilwrap.
+ */
/* ================================================================
* Depth buffer
*/
+/* These functions require locking */
+#undef HW_LOCK
+#undef HW_UNLOCK
+#define HW_LOCK() LOCK_HARDWARE(R128_CONTEXT(ctx));
+#define HW_UNLOCK() UNLOCK_HARDWARE(R128_CONTEXT(ctx));
+
/* 16-bit depth buffer functions
*/
+#define VALUE_TYPE GLushort
+
#define WRITE_DEPTH_SPAN() \
+do { \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
- depth, mask );
+ depth, mask ); \
+} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
GLint oy[MAX_WIDTH]; \
for ( i = 0 ; i < n ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
- } \
- for ( i = 0 ; i < n ; i++ ) { \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
GLint i, remaining = n; \
\
while ( remaining > 0 ) { \
- GLint ox[MAX_WIDTH]; \
- GLint oy[MAX_WIDTH]; \
+ GLint ox[128]; \
+ GLint oy[128]; \
GLint count; \
\
if ( remaining <= 128 ) { \
} \
for ( i = 0 ; i < count ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
- } \
- for ( i = 0 ; i < count ; i++ ) { \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
\
} \
} while (0)
-#define TAG(x) r128##x##_16
+#define TAG(x) r128##x##_z16
#include "depthtmp.h"
/* 24-bit depth, 8-bit stencil buffer functions
*/
+#define VALUE_TYPE GLuint
+
#define WRITE_DEPTH_SPAN() \
+do { \
+ GLuint buf[n]; \
+ GLint i; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
+ } \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
- depth, mask );
+ buf, mask ); \
+} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
+ GLuint buf[n]; \
GLint ox[MAX_WIDTH]; \
GLint oy[MAX_WIDTH]; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
for ( i = 0 ; i < n ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
+ oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
+ r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
for ( i = 0 ; i < n ; i++ ) { \
- oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
+ buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
} \
- r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
+ r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
} while (0)
#define READ_DEPTH_SPAN() \
r128scrn->spanOffset); \
GLint i; \
\
+ /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
GLint i, remaining = n; \
\
while ( remaining > 0 ) { \
- GLint ox[MAX_WIDTH]; \
- GLint oy[MAX_WIDTH]; \
+ GLint ox[128]; \
+ GLint oy[128]; \
GLint count; \
\
if ( remaining <= 128 ) { \
} \
for ( i = 0 ; i < count ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
- } \
- for ( i = 0 ; i < count ; i++ ) { \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
\
} \
} while (0)
-#define TAG(x) r128##x##_24_8
+#define TAG(x) r128##x##_z24_s8
#include "depthtmp.h"
* Stencil buffer
*/
-/* FIXME: Add support for hardware stencil buffers.
+/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
+#define WRITE_STENCIL_SPAN() \
+do { \
+ GLuint buf[n]; \
+ GLint i; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
+ } \
+ r128WriteDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y, \
+ buf, mask ); \
+} while (0)
+#define WRITE_STENCIL_PIXELS() \
+do { \
+ GLuint buf[n]; \
+ GLint ox[MAX_WIDTH]; \
+ GLint oy[MAX_WIDTH]; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ ox[i] = x[i] + dPriv->x; \
+ oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
+ } \
+ r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
+ } \
+ r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
+} while (0)
-/* 32 bit depthbuffer functions */
-#define WRITE_DEPTH(_x, _y, d) \
- *(GLuint *)(buf + _x*4 + _y*pitch) = d
+#define READ_STENCIL_SPAN() \
+do { \
+ GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ GLint i; \
+ \
+ /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ \
+ for ( i = 0 ; i < n ; i++ ) { \
+ stencil[i] = (buf[i] & 0xff000000) >> 24; \
+ } \
+} while (0)
+#define READ_STENCIL_PIXELS() \
+do { \
+ GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ GLint i, remaining = n; \
+ \
+ while ( remaining > 0 ) { \
+ GLint ox[128]; \
+ GLint oy[128]; \
+ GLint count; \
+ \
+ if ( remaining <= 128 ) { \
+ count = remaining; \
+ } else { \
+ count = 128; \
+ } \
+ for ( i = 0 ; i < count ; i++ ) { \
+ ox[i] = x[i] + dPriv->x; \
+ oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
+ } \
+ \
+ r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
+ \
+ for ( i = 0 ; i < count ; i++ ) { \
+ stencil[i] = (buf[i] & 0xff000000) >> 24; \
+ } \
+ stencil += count; \
+ x += count; \
+ y += count; \
+ remaining -= count; \
+ } \
+} while (0)
+#define TAG(x) radeon##x##_z24_s8
+#include "stenciltmp.h"
-/*
- * This function is called to specify which buffer to read and write
- * for software rasterization (swrast) fallbacks. This doesn't necessarily
- * correspond to glDrawBuffer() or glReadBuffer() calls.
- */
-static void r128DDSetBuffer( GLcontext *ctx,
- GLframebuffer *colorBuffer,
- GLuint bufferBit )
+static void
+r128SpanRenderStart( struct gl_context *ctx )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
-
- switch ( bufferBit ) {
- case DD_FRONT_LEFT_BIT:
- if ( rmesa->sarea->pfCurrentPage == 1 ) {
- rmesa->drawOffset = rmesa->readOffset = rmesa->r128Screen->backOffset;
- rmesa->drawPitch = rmesa->readPitch = rmesa->r128Screen->backPitch;
- } else {
- rmesa->drawOffset = rmesa->readOffset = rmesa->r128Screen->frontOffset;
- rmesa->drawPitch = rmesa->readPitch = rmesa->r128Screen->frontPitch;
- }
- break;
- case DD_BACK_LEFT_BIT:
- if ( rmesa->sarea->pfCurrentPage == 1 ) {
- rmesa->drawOffset = rmesa->readOffset = rmesa->r128Screen->frontOffset;
- rmesa->drawPitch = rmesa->readPitch = rmesa->r128Screen->frontPitch;
- } else {
- rmesa->drawOffset = rmesa->readOffset = rmesa->r128Screen->backOffset;
- rmesa->drawPitch = rmesa->readPitch = rmesa->r128Screen->backPitch;
- }
- break;
- default:
- break;
- }
+ FLUSH_BATCH(rmesa);
+ LOCK_HARDWARE(rmesa);
+ r128WaitForIdleLocked( rmesa );
}
-
-void r128DDInitSpanFuncs( GLcontext *ctx )
+static void
+r128SpanRenderFinish( struct gl_context *ctx )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
+ _swrast_flush( ctx );
+ r128WaitForIdleLocked( rmesa );
+ UNLOCK_HARDWARE( rmesa );
+}
+
+void r128DDInitSpanFuncs( struct gl_context *ctx )
+{
struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+ swdd->SpanRenderStart = r128SpanRenderStart;
+ swdd->SpanRenderFinish = r128SpanRenderFinish;
+}
- swdd->SetBuffer = r128DDSetBuffer;
-
- switch ( rmesa->r128Screen->cpp ) {
- case 2:
- swdd->WriteRGBASpan = r128WriteRGBASpan_RGB565;
- swdd->WriteRGBSpan = r128WriteRGBSpan_RGB565;
- swdd->WriteMonoRGBASpan = r128WriteMonoRGBASpan_RGB565;
- swdd->WriteRGBAPixels = r128WriteRGBAPixels_RGB565;
- swdd->WriteMonoRGBAPixels = r128WriteMonoRGBAPixels_RGB565;
- swdd->ReadRGBASpan = r128ReadRGBASpan_RGB565;
- swdd->ReadRGBAPixels = r128ReadRGBAPixels_RGB565;
- break;
-
- case 4:
- swdd->WriteRGBASpan = r128WriteRGBASpan_ARGB8888;
- swdd->WriteRGBSpan = r128WriteRGBSpan_ARGB8888;
- swdd->WriteMonoRGBASpan = r128WriteMonoRGBASpan_ARGB8888;
- swdd->WriteRGBAPixels = r128WriteRGBAPixels_ARGB8888;
- swdd->WriteMonoRGBAPixels = r128WriteMonoRGBAPixels_ARGB8888;
- swdd->ReadRGBASpan = r128ReadRGBASpan_ARGB8888;
- swdd->ReadRGBAPixels = r128ReadRGBAPixels_ARGB8888;
- break;
-
- default:
- break;
- }
- switch ( rmesa->glCtx->Visual.depthBits ) {
- case 16:
- swdd->ReadDepthSpan = r128ReadDepthSpan_16;
- swdd->WriteDepthSpan = r128WriteDepthSpan_16;
- swdd->ReadDepthPixels = r128ReadDepthPixels_16;
- swdd->WriteDepthPixels = r128WriteDepthPixels_16;
- break;
-
- case 24:
- swdd->ReadDepthSpan = r128ReadDepthSpan_24_8;
- swdd->WriteDepthSpan = r128WriteDepthSpan_24_8;
- swdd->ReadDepthPixels = r128ReadDepthPixels_24_8;
- swdd->WriteDepthPixels = r128WriteDepthPixels_24_8;
- break;
-
- default:
- break;
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+r128SetSpanFunctions(driRenderbuffer *drb, const struct gl_config *vis)
+{
+ if (drb->Base.Format == MESA_FORMAT_RGB565) {
+ r128InitPointers_RGB565(&drb->Base);
+ }
+ else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
+ r128InitPointers_ARGB8888(&drb->Base);
+ }
+ else if (drb->Base.Format == MESA_FORMAT_Z16) {
+ r128InitDepthPointers_z16(&drb->Base);
+ }
+ else if (drb->Base.Format == MESA_FORMAT_S8_Z24) {
+ r128InitDepthPointers_z24_s8(&drb->Base);
+ }
+ else if (drb->Base.Format == MESA_FORMAT_S8) {
+ radeonInitStencilPointers_z24_s8(&drb->Base);
}
-
- swdd->WriteCI8Span = NULL;
- swdd->WriteCI32Span = NULL;
- swdd->WriteMonoCISpan = NULL;
- swdd->WriteCI32Pixels = NULL;
- swdd->WriteMonoCIPixels = NULL;
- swdd->ReadCI32Span = NULL;
- swdd->ReadCI32Pixels = NULL;
}