Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_span.c
index cc779d684fbd173f92a49bfcecb5d6335fa237bc..16f9fb99e6757e66bb230ed81562a29e5cbb2b56 100644 (file)
@@ -39,9 +39,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *   Keith Whitwell <keith@tungstengraphics.com>
  *
  */
-#include <unistd.h>
-#include "glheader.h"
-#include "imports.h"
+
+#include "main/glheader.h"
 #include "swrast/swrast.h"
 
 #include "r300_state.h"
@@ -51,10 +50,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "drirenderbuffer.h"
 
-
 #define DBG 0
 
-
 /*
  * Note that all information needed to access pixels in a renderbuffer
  * should be obtained through the gl_renderbuffer parameter, not per-context
@@ -85,8 +82,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define HW_UNLOCK()
 
-
-
 /* ================================================================
  * Color buffer
  */
@@ -101,7 +96,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
 #include "spantmp2.h"
 
-
 /* 32 bit, ARGB8888 color spanline and pixel functions
  */
 #define SPANTMP_PIXEL_FMT GL_BGRA
@@ -112,7 +106,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
 #include "spantmp2.h"
 
-
 /* ================================================================
  * Depth buffer
  */
@@ -127,61 +120,60 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * too...
  */
 
-static GLuint
-radeon_mba_z32( const driRenderbuffer *drb, GLint x, GLint y )
+static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
 {
-   GLuint pitch = drb->pitch;
-   if (1 /*|| drb->depthHasSurface */) {
-      return 4 * (x + y * pitch);
-   }
-   else {
-      GLuint ba, address = 0;                  /* a[0..1] = 0           */
-
-      ba = (y / 8) * (pitch / 8) + (x / 8);
-
-      address |= (x & 0x7) << 2;               /* a[2..4] = x[0..2]     */
-      address |= (y & 0x3) << 5;               /* a[5..6] = y[0..1]     */
-      address |=
-         (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7]    = x[4] ^ y[2] */
-      address |= (ba & 0x3) << 8;              /* a[8..9] = ba[0..1]    */
-
-      address |= (y & 0x8) << 7;               /* a[10]   = y[3]        */
-      address |=
-         (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11]   = x[3] ^ y[4] */
-      address |= (ba & ~0x3) << 10;            /* a[12..] = ba[2..]     */
-
-      return address;
-   }
+       GLuint pitch = drb->pitch;
+       if (drb->depthHasSurface) {
+               return 4 * (x + y * pitch);
+       } else {
+               GLuint ba, address = 0; /* a[0..1] = 0           */
+
+#ifdef COMPILE_R300
+               ba = (y / 8) * (pitch / 8) + (x / 8);
+#else
+               ba = (y / 16) * (pitch / 16) + (x / 16);
+#endif
+
+               address |= (x & 0x7) << 2;      /* a[2..4] = x[0..2]     */
+               address |= (y & 0x3) << 5;      /* a[5..6] = y[0..1]     */
+               address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;        /* a[7]    = x[4] ^ y[2] */
+               address |= (ba & 0x3) << 8;     /* a[8..9] = ba[0..1]    */
+
+               address |= (y & 0x8) << 7;      /* a[10]   = y[3]        */
+               address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;        /* a[11]   = x[3] ^ y[4] */
+               address |= (ba & ~0x3) << 10;   /* a[12..] = ba[2..]     */
+
+               return address;
+       }
 }
 
-
 static INLINE GLuint
-radeon_mba_z16( const driRenderbuffer *drb, GLint x, GLint y )
+radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
 {
-   GLuint pitch = drb->pitch;
-   if (1 /*|| drb->depthHasSurface */) {
-      return 2 * (x + y * pitch);
-   }
-   else {
-      GLuint ba, address = 0;                  /* a[0]    = 0           */
-
-      ba = (y / 16) * (pitch / 32) + (x / 32);
-
-      address |= (x & 0x7) << 1;               /* a[1..3] = x[0..2]     */
-      address |= (y & 0x7) << 4;               /* a[4..6] = y[0..2]     */
-      address |= (x & 0x8) << 4;               /* a[7]    = x[3]        */
-      address |= (ba & 0x3) << 8;              /* a[8..9] = ba[0..1]    */
-      address |= (y & 0x8) << 7;               /* a[10]   = y[3]        */
-      address |= ((x & 0x10) ^ (y & 0x10)) << 7;/* a[11]   = x[4] ^ y[4] */
-      address |= (ba & ~0x3) << 10;            /* a[12..] = ba[2..]     */
-
-      return address;
-   }
+       GLuint pitch = drb->pitch;
+       if (drb->depthHasSurface) {
+               return 2 * (x + y * pitch);
+       } else {
+               GLuint ba, address = 0; /* a[0]    = 0           */
+
+               ba = (y / 16) * (pitch / 32) + (x / 32);
+
+               address |= (x & 0x7) << 1;      /* a[1..3] = x[0..2]     */
+               address |= (y & 0x7) << 4;      /* a[4..6] = y[0..2]     */
+               address |= (x & 0x8) << 4;      /* a[7]    = x[3]        */
+               address |= (ba & 0x3) << 8;     /* a[8..9] = ba[0..1]    */
+               address |= (y & 0x8) << 7;      /* a[10]   = y[3]        */
+               address |= ((x & 0x10) ^ (y & 0x10)) << 7;      /* a[11]   = x[4] ^ y[4] */
+               address |= (ba & ~0x3) << 10;   /* a[12..] = ba[2..]     */
+
+               return address;
+       }
 }
 
-
 /* 16-bit depth buffer functions
  */
+#define VALUE_TYPE GLushort
+
 #define WRITE_DEPTH( _x, _y, d )                                       \
    *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
 
@@ -191,12 +183,14 @@ radeon_mba_z16( const driRenderbuffer *drb, GLint x, GLint y )
 #define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
 
-
 /* 24 bit depth, 8 bit stencil depthbuffer functions
  *
  * Careful: It looks like the R300 uses ZZZS byte order while the R200
  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
  */
+#define VALUE_TYPE GLuint
+
+#ifdef COMPILE_R300
 #define WRITE_DEPTH( _x, _y, d )                                       \
 do {                                                                   \
    GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
@@ -205,23 +199,39 @@ do {                                                                      \
    tmp |= ((d << 8) & 0xffffff00);                                     \
    *(GLuint *)(buf + offset) = tmp;                                    \
 } while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   tmp &= 0xff000000;                                                  \
+   tmp |= ((d) & 0x00ffffff);                                          \
+   *(GLuint *)(buf + offset) = tmp;                                    \
+} while (0)
+#endif
 
+#ifdef COMPILE_R300
 #define READ_DEPTH( d, _x, _y )                                                \
   do { \
     d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,               \
                                         _y + yo )) & 0xffffff00) >> 8; \
   }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )                                                \
+   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,                 \
+                                        _y + yo )) & 0x00ffffff;
+#endif
 
 #define TAG(x) radeon##x##_z24_s8
 #include "depthtmp.h"
 
-
 /* ================================================================
  * Stencil buffer
  */
 
 /* 24 bit depth, 8 bit stencil depthbuffer functions
  */
+#ifdef COMPILE_R300
 #define WRITE_STENCIL( _x, _y, d )                                     \
 do {                                                                   \
    GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
@@ -230,95 +240,110 @@ do {                                                                     \
    tmp |= (d) & 0xff;                                                  \
    *(GLuint *)(buf + offset) = tmp;                                    \
 } while (0)
+#else
+#define WRITE_STENCIL( _x, _y, d )                                     \
+do {                                                                   \
+   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   tmp &= 0x00ffffff;                                                  \
+   tmp |= (((d) & 0xff) << 24);                                                \
+   *(GLuint *)(buf + offset) = tmp;                                    \
+} while (0)
+#endif
 
+#ifdef COMPILE_R300
 #define READ_STENCIL( d, _x, _y )                                      \
 do {                                                                   \
    GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
    GLuint tmp = *(GLuint *)(buf + offset);                             \
    d = tmp & 0x000000ff;                                               \
 } while (0)
+#else
+#define READ_STENCIL( d, _x, _y )                                      \
+do {                                                                   \
+   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   d = (tmp & 0xff000000) >> 24;                                       \
+} while (0)
+#endif
 
 #define TAG(x) radeon##x##_z24_s8
 #include "stenciltmp.h"
 
-
-
 /* Move locking out to get reasonable span performance (10x better
  * than doing this in HW_LOCK above).  WaitForIdle() is the main
  * culprit.
  */
 
-static void radeonSpanRenderStart( GLcontext *ctx )
+static void radeonSpanRenderStart(GLcontext * ctx)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   {
-       static int first = 1;
-       r300ContextPtr r300 = (r300ContextPtr)rmesa;
-       
-       if (first) {
-               r300->span_dlocking = getenv("R300_SPAN_DISABLE_LOCKING") ? 1 : 0;
-               if (r300->span_dlocking == 0) {
-                       fprintf(stderr, "Try R300_SPAN_DISABLE_LOCKING env var if this hangs.\n");
-                       fflush(stderr);
-                       sleep(1);
-               }
-               first = 0;
-       }
-       
-       if (r300->span_dlocking) {
-               r300Flush(ctx);
-               LOCK_HARDWARE( rmesa );
-               radeonWaitForIdleLocked( rmesa );
-               UNLOCK_HARDWARE( rmesa );
-               
-               return;
+       radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+#ifdef COMPILE_R300
+       r300ContextPtr r300 = (r300ContextPtr) rmesa;
+       R300_FIREVERTICES(r300);
+#else
+       RADEON_FIREVERTICES(rmesa);
+#endif
+       LOCK_HARDWARE(rmesa);
+       radeonWaitForIdleLocked(rmesa);
+
+       /* Read the first pixel in the frame buffer.  This should
+        * be a noop, right?  In fact without this conform fails as reading
+        * from the framebuffer sometimes produces old results -- the
+        * on-card read cache gets mixed up and doesn't notice that the
+        * framebuffer has been updated.
+        *
+        * Note that we should probably be reading some otherwise unused
+        * region of VRAM, otherwise we might get incorrect results when
+        * reading pixels from the top left of the screen.
+        *
+        * I found this problem on an R420 with glean's texCube test.
+        * Note that the R200 span code also *writes* the first pixel in the
+        * framebuffer, but I've found this to be unnecessary.
+        *  -- Nicolai Hähnle, June 2008
+        */
+       {
+               int p;
+               driRenderbuffer *drb =
+                       (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+               volatile int *buf =
+                       (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+               p = *buf;
        }
-   }
-   //   R300_FIREVERTICES( rmesa );
-   // old code has flush
-   r300Flush(ctx);
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
 }
 
-static void radeonSpanRenderFinish( GLcontext *ctx )
+static void radeonSpanRenderFinish(GLcontext * ctx)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   r300ContextPtr r300 = (r300ContextPtr)rmesa;
-   _swrast_flush( ctx );
-   if (r300->span_dlocking == 0)
-       UNLOCK_HARDWARE( rmesa );
+       radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+       _swrast_flush(ctx);
+       UNLOCK_HARDWARE(rmesa);
 }
 
-void radeonInitSpanFuncs( GLcontext *ctx )
+void radeonInitSpanFuncs(GLcontext * ctx)
 {
-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-   swdd->SpanRenderStart          = radeonSpanRenderStart;
-   swdd->SpanRenderFinish         = radeonSpanRenderFinish; 
+       struct swrast_device_driver *swdd =
+           _swrast_GetDeviceDriverReference(ctx);
+       swdd->SpanRenderStart = radeonSpanRenderStart;
+       swdd->SpanRenderFinish = radeonSpanRenderFinish;
 }
 
-
 /**
  * Plug in the Get/Put routines for the given driRenderbuffer.
  */
-void
-radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
 {
-   if (drb->Base.InternalFormat == GL_RGBA) {
-      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
-         radeonInitPointers_RGB565(&drb->Base);
-      }
-      else {
-         radeonInitPointers_ARGB8888(&drb->Base);
-      }
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-      radeonInitDepthPointers_z16(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-      radeonInitDepthPointers_z24_s8(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-      radeonInitStencilPointers_z24_s8(&drb->Base);
-   }
+       if (drb->Base.InternalFormat == GL_RGBA) {
+               if (vis->redBits == 5 && vis->greenBits == 6
+                   && vis->blueBits == 5) {
+                       radeonInitPointers_RGB565(&drb->Base);
+               } else {
+                       radeonInitPointers_ARGB8888(&drb->Base);
+               }
+       } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+               radeonInitDepthPointers_z16(&drb->Base);
+       } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+               radeonInitDepthPointers_z24_s8(&drb->Base);
+       } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+               radeonInitStencilPointers_z24_s8(&drb->Base);
+       }
 }