Merge remote branch 'main/radeon-rewrite'

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c

index 7ad170a939cf9b7b7d447a866c4c73ff1094e7ad..e28f28662b458da7b15a587e8862e2f965236c7f 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -55,15 +55,11 @@ static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
                              GLint x, GLint y)
  {
      GLubyte *ptr = rrb->bo->ptr;
-    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
      uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
      GLint offset;
      GLint nmacroblkpl;
      GLint nmicroblkpl;
  
-    x += dPriv->x;
-    y += dPriv->y;
-
      if (rrb->has_surface || !(rrb->bo->flags & mask)) {
          offset = x * rrb->cpp + y * rrb->pitch;
      } else {
@@ -99,15 +95,11 @@ static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
                              GLint x, GLint y)
  {
      GLubyte *ptr = rrb->bo->ptr;
-    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
      uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
      GLint offset;
      GLint nmacroblkpl;
      GLint nmicroblkpl;
  
-    x += dPriv->x;
-    y += dPriv->y;
-
      if (rrb->has_surface || !(rrb->bo->flags & mask)) {
          offset = x * rrb->cpp + y * rrb->pitch;
      } else {
@@ -143,7 +135,6 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
                            GLint x, GLint y)
  {
      GLubyte *ptr = rrb->bo->ptr;
-    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
      uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
      GLint offset;
      GLint microblkxs;
@@ -151,9 +142,6 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
      GLint nmacroblkpl;
      GLint nmicroblkpl;
  
-    x += dPriv->x;
-    y += dPriv->y;
-
      if (rrb->has_surface || !(rrb->bo->flags & mask)) {
          offset = x * rrb->cpp + y * rrb->pitch;
      } else {
@@ -190,6 +178,19 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
      return &ptr[offset];
  }
  
+#ifndef COMPILE_R300
+static uint32_t
+z24s8_to_s8z24(uint32_t val)
+{
+   return (val << 24) | (val >> 8);
+}
+
+static uint32_t
+s8z24_to_z24s8(uint32_t val)
+{
+   return (val >> 24) | (val << 8);
+}
+#endif
  
  /*
   * Note that all information needed to access pixels in a renderbuffer
@@ -250,7 +251,21 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
  
  #define TAG(x)    radeon##x##_RGB565
  #define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   *_ptr = d;                                                          \
+} while (0)
  #include "spantmp2.h"
  
  /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -260,7 +275,7 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
  
  #define TAG(x)    radeon##x##_ARGB8888
  #define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
  #include "spantmp2.h"
  
  /* ================================================================
@@ -282,15 +297,15 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
  #define VALUE_TYPE GLushort
  
  #define WRITE_DEPTH( _x, _y, d )                                       \
-   *(GLushort *)radeon_ptr(rrb, _x, _y) = d
+   *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
  
  #define READ_DEPTH( d, _x, _y )                                                \
-   d = *(GLushort *)radeon_ptr(rrb, _x, _y)
+   d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
  
  #define TAG(x) radeon##x##_z16
  #include "depthtmp.h"
  
-/* 24 bit depth, 8 bit stencil depthbuffer functions
+/* 24 bit depth
   *
   * Careful: It looks like the R300 uses ZZZS byte order while the R200
   * uses SZZZ for 24 bit depth, 8 bit stencil mode.
@@ -300,7 +315,7 @@ static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
  #ifdef COMPILE_R300
  #define WRITE_DEPTH( _x, _y, d )                                       \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );                \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
     GLuint tmp = *_ptr;                         \
     tmp &= 0x000000ff;                                                  \
     tmp |= ((d << 8) & 0xffffff00);                                     \
@@ -309,8 +324,8 @@ do {                                                                        \
  #else
  #define WRITE_DEPTH( _x, _y, d )                                       \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );                \
-   GLuint tmp = *_ptr;                         \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );        \
+   GLuint tmp = *_ptr;                                                 \
     tmp &= 0xff000000;                                                  \
     tmp |= ((d) & 0x00ffffff);                                          \
     *_ptr = tmp;                                        \
@@ -319,12 +334,52 @@ do {                                                                      \
  
  #ifdef COMPILE_R300
  #define READ_DEPTH( d, _x, _y )                                                \
-  do { \
-    d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \
+  do {                                                                 \
+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
    }while(0)
  #else
+#define READ_DEPTH( d, _x, _y )        \
+  d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
+#endif
+/*
+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
+   d = *(GLuint*)(radeon_ptr(rrb, _x,  _y )) & 0x00ffffff;
+*/
+#define TAG(x) radeon##x##_z24
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#ifdef COMPILE_R300
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   *_ptr = d;                                                          \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );        \
+   GLuint tmp = z24s8_to_s8z24(d);                                     \
+   *_ptr = tmp;                                        \
+} while (0)
+#endif
+
+#ifdef COMPILE_R300
  #define READ_DEPTH( d, _x, _y )                                                \
-   d = *(GLuint*)(radeon_ptr32(rrb, _x,        _y )) & 0x00ffffff;
+  do { \
+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)));       \
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )        do {                                    \
+    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr32(rrb, _x + x_off,        _y + y_off ))); \
+  } while (0)
  #endif
  /*
      fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
@@ -342,7 +397,7 @@ do {                                                                        \
  #ifdef COMPILE_R300
  #define WRITE_STENCIL( _x, _y, d )                                     \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);          \
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);          \
     GLuint tmp = *_ptr;                         \
     tmp &= 0xffffff00;                                                  \
     tmp |= (d) & 0xff;                                                  \
@@ -351,7 +406,7 @@ do {                                                                        \
  #else
  #define WRITE_STENCIL( _x, _y, d )                                     \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);          \
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);          \
     GLuint tmp = *_ptr;                         \
     tmp &= 0x00ffffff;                                                  \
     tmp |= (((d) & 0xff) << 24);                                                \
@@ -362,14 +417,14 @@ do {                                                                      \
  #ifdef COMPILE_R300
  #define READ_STENCIL( d, _x, _y )                                      \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );                \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
     GLuint tmp = *_ptr;                         \
     d = tmp & 0x000000ff;                                               \
  } while (0)
  #else
  #define READ_STENCIL( d, _x, _y )                                      \
  do {                                                                   \
-   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );                \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
     GLuint tmp = *_ptr;                         \
     d = (tmp & 0xff000000) >> 24;                                       \
  } while (0)
@@ -379,33 +434,63 @@ do {                                                                      \
  #include "stenciltmp.h"
  
  
-static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
  {
-       struct radeon_renderbuffer *rrb = (void*)rb;
+       struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
         int r;
         
-       if (rrb->bo) {
-               r = radeon_bo_map(rrb->bo, write);
+       if (rrb == NULL || !rrb->bo)
+               return;
+
+       if (flag) {
+               r = radeon_bo_map(rrb->bo, 1);
                 if (r) {
                         fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
                                 __FUNCTION__, r);
                 }
-       }
  
-       radeonSetSpanFunctions(rrb);
+               radeonSetSpanFunctions(rrb);
+       } else {
+               radeon_bo_unmap(rrb->bo);
+               rb->GetRow = NULL;
+               rb->PutRow = NULL;
+       }
  }
  
-static void unmap_buffer(struct gl_renderbuffer *rb)
+static void
+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
  {
-       struct radeon_renderbuffer *rrb = (void*)rb;
+       GLuint i, j;
  
-       if (rrb->bo) {
-               radeon_bo_unmap(rrb->bo);
+       /* color draw buffers */
+       for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+               map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
+
+       /* check for render to textures */
+       for (i = 0; i < BUFFER_COUNT; i++) {
+               struct gl_renderbuffer_attachment *att =
+                       ctx->DrawBuffer->Attachment + i;
+               struct gl_texture_object *tex = att->Texture;
+               if (tex) {
+                       /* render to texture */
+                       ASSERT(att->Renderbuffer);
+                       if (map)
+                               ctx->Driver.MapTexture(ctx, tex);
+                       else
+                               ctx->Driver.UnmapTexture(ctx, tex);
+               }
         }
-       rb->GetRow = NULL;
-       rb->PutRow = NULL;
-}
+       
+       map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
+
+       /* depth buffer (Note wrapper!) */
+       if (ctx->DrawBuffer->_DepthBuffer)
+               map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
+       
+       if (ctx->DrawBuffer->_StencilBuffer)
+               map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
  
+}
  static void radeonSpanRenderStart(GLcontext * ctx)
  {
         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -413,31 +498,24 @@ static void radeonSpanRenderStart(GLcontext * ctx)
  
         radeon_firevertices(rmesa);
  
+       /* The locking and wait for idle should really only be needed in classic mode.
+        * In a future memory manager based implementation, this should become
+        * unnecessary due to the fact that mapping our buffers, textures, etc.
+        * should implicitly wait for any previous rendering commands that must
+        * be waited on. */
+       if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+               LOCK_HARDWARE(rmesa);
+               radeonWaitForIdleLocked(rmesa);
+       }
         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
                 if (ctx->Texture.Unit[i]._ReallyEnabled)
                         ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
         }
  
-       /* color draw buffers */
-       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-               map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
-       }
+       radeon_map_unmap_buffers(ctx, 1);
  
-       map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
  
-       if (ctx->DrawBuffer->_DepthBuffer) {
-               map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
-       }
-       if (ctx->DrawBuffer->_StencilBuffer)
-               map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
  
-       /* The locking and wait for idle should really only be needed in classic mode.
-        * In a future memory manager based implementation, this should become
-        * unnecessary due to the fact that mapping our buffers, textures, etc.
-        * should implicitly wait for any previous rendering commands that must
-        * be waited on. */
-       LOCK_HARDWARE(rmesa);
-       radeonWaitForIdleLocked(rmesa);
  }
  
  static void radeonSpanRenderFinish(GLcontext * ctx)
@@ -445,23 +523,15 @@ static void radeonSpanRenderFinish(GLcontext * ctx)
         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
         int i;
         _swrast_flush(ctx);
-       UNLOCK_HARDWARE(rmesa);
-
+       if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+               UNLOCK_HARDWARE(rmesa);
+       }
         for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
                 if (ctx->Texture.Unit[i]._ReallyEnabled)
                         ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
         }
  
-       /* color draw buffers */
-       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
-               unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
-
-       unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
-
-       if (ctx->DrawBuffer->_DepthBuffer)
-               unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
-       if (ctx->DrawBuffer->_StencilBuffer)
-               unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+       radeon_map_unmap_buffers(ctx, 0);
  }
  
  void radeonInitSpanFuncs(GLcontext * ctx)
@@ -480,12 +550,14 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
         if (rrb->base._ActualFormat == GL_RGB5) {
                 radeonInitPointers_RGB565(&rrb->base);
         } else if (rrb->base._ActualFormat == GL_RGB8) {
-               radeonInitPointers_ARGB8888(&rrb->base);
+               radeonInitPointers_xRGB8888(&rrb->base);
         } else if (rrb->base._ActualFormat == GL_RGBA8) {
                 radeonInitPointers_ARGB8888(&rrb->base);
         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
                 radeonInitDepthPointers_z16(&rrb->base);
         } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+               radeonInitDepthPointers_z24(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
                 radeonInitDepthPointers_z24_s8(&rrb->base);
         } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
                 radeonInitStencilPointers_z24_s8(&rrb->base);