Put some fallback debugging output under DEBUG_FALLBACKS.

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_ioctl.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c

index 7b27d3f76684f51348d57fc376395be0cb2f124a..3e1fc4bafff0cd981fc2e4b4c6471ae0a2671439 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -49,6 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "radeon_tcl.h"
  #include "radeon_sanity.h"
  
+#define STANDALONE_MMIO
  #include "radeon_macros.h"  /* for INREG() */
  
  #include "vblank.h"
@@ -58,10 +59,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  
  
  static void radeonWaitForIdle( radeonContextPtr rmesa );
-
-/* =============================================================
- * Kernel command buffer handling
- */
+static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+                                   const char * caller );
  
  static void print_state_atom( struct radeon_state_atom *state )
  {
@@ -75,115 +74,160 @@ static void print_state_atom( struct radeon_state_atom *state )
  
  }
  
-static void radeon_emit_state_list( radeonContextPtr rmesa, 
-                                   struct radeon_state_atom *list )
+static void radeonSaveHwState( radeonContextPtr rmesa )
  {
-   struct radeon_state_atom *state, *tmp;
-   char *dest;
-   int i, size, texunits;
-
-   /* It appears that some permutations of state atoms lock up the
-    * chip.  Therefore we make sure that state atoms are emitted in a
-    * fixed order. First mark all dirty state atoms and then go
-    * through all state atoms in a well defined order and emit only
-    * the marked ones.
-    * FIXME: This requires knowledge of which state atoms exist.
-    * FIXME: Is the zbs hack below still needed?
-    */
-   size = 0;
-   foreach_s( state, tmp, list ) {
-      if (state->check( rmesa->glCtx )) {
-        size += state->cmd_size;
-        state->dirty = GL_TRUE;
-        move_to_head( &(rmesa->hw.clean), state );
-        if (RADEON_DEBUG & DEBUG_STATE) 
-           print_state_atom( state );
+   struct radeon_state_atom *atom;
+   char * dest = rmesa->backup_store.cmd_buf;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   
+   rmesa->backup_store.cmd_used = 0;
+
+   foreach( atom, &rmesa->hw.atomlist ) {
+      if ( atom->check( rmesa->glCtx ) ) {
+        int size = atom->cmd_size * 4;
+        memcpy( dest, atom->cmd, size);
+        dest += size;
+        rmesa->backup_store.cmd_used += size;
+        if (RADEON_DEBUG & DEBUG_STATE)
+           print_state_atom( atom );
        }
-      else if (RADEON_DEBUG & DEBUG_STATE)
-        fprintf(stderr, "skip state %s\n", state->name);
     }
-   /* short cut */
-   if (!size)
-       return;
-
-   dest = radeonAllocCmdBuf( rmesa, size * 4, __FUNCTION__);
-   texunits = rmesa->glCtx->Const.MaxTextureUnits;
-
-#define EMIT_ATOM(ATOM) \
-do { \
-   if (rmesa->hw.ATOM.dirty) { \
-      rmesa->hw.ATOM.dirty = GL_FALSE; \
-      memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
-      dest += rmesa->hw.ATOM.cmd_size * 4; \
-   } \
-} while (0)
-
-   EMIT_ATOM (ctx);
-   EMIT_ATOM (set);
-   EMIT_ATOM (lin);
-   EMIT_ATOM (msk);
-   EMIT_ATOM (vpt);
-   EMIT_ATOM (tcl);
-   EMIT_ATOM (msc);
-   for (i = 0; i < texunits; ++i) {
-       EMIT_ATOM (tex[i]);
-       EMIT_ATOM (txr[i]);
+
+   assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "Returning to radeonEmitState\n");
+}
+
+/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+ * we need to unwire our current cmdbuf, hook the one with the saved state in
+ * it, flush it, and then put the current one back.  This is so commands at the
+ * start of a cmdbuf can rely on the state being kept from the previous one.
+ */
+static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+{
+   GLuint nr_released_bufs;
+   struct radeon_store saved_store;
+
+   if (rmesa->backup_store.cmd_used == 0)
+      return;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "Emitting backup state on lost context\n");
+
+   rmesa->lost_context = GL_FALSE;
+
+   nr_released_bufs = rmesa->dma.nr_released_bufs;
+   saved_store = rmesa->store;
+   rmesa->dma.nr_released_bufs = 0;
+   rmesa->store = rmesa->backup_store;
+   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+   rmesa->dma.nr_released_bufs = nr_released_bufs;
+   rmesa->store = saved_store;
+}
+
+/* =============================================================
+ * Kernel command buffer handling
+ */
+
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+void radeonSetUpAtomList( radeonContextPtr rmesa )
+{
+   int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
+
+   make_empty_list(&rmesa->hw.atomlist);
+   rmesa->hw.atomlist.name = "atom-list";
+
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
+   for (i = 0; i < mtu; ++i) {
+       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
+       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
     }
-   EMIT_ATOM (zbs);
-   EMIT_ATOM (mtl);
-   for (i = 0; i < 3 + texunits; ++i)
-       EMIT_ATOM (mat[i]);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
+   for (i = 0; i < 3 + mtu; ++i)
+      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
     for (i = 0; i < 8; ++i)
-       EMIT_ATOM (lit[i]);
+      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
     for (i = 0; i < 6; ++i)
-       EMIT_ATOM (ucp[i]);
-   EMIT_ATOM (eye);
-   EMIT_ATOM (grd);
-   EMIT_ATOM (fog);
-   EMIT_ATOM (glt);
-
-#undef EMIT_ATOM
+      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
+   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
  }
  
-
  void radeonEmitState( radeonContextPtr rmesa )
  {
-   struct radeon_state_atom *state, *tmp;
+   struct radeon_state_atom *atom;
+   char *dest;
  
     if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
        fprintf(stderr, "%s\n", __FUNCTION__);
  
-   /* Somewhat overkill:
-    */
-   if (rmesa->lost_context) {
-      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
-        fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+   if (rmesa->save_on_next_emit) {
+      radeonSaveHwState(rmesa);
+      rmesa->save_on_next_emit = GL_FALSE;
+   }
  
-      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
-        move_to_tail(&(rmesa->hw.dirty), state );
+   /* this code used to return here but now it emits zbs */
  
-      rmesa->lost_context = 0;
-   }
-   else if (1) {
-      /* This is a darstardly kludge to work around a lockup that I
-       * haven't otherwise figured out.
-       */
-      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
+   /* To avoid going across the entire set of states multiple times, just check
+    * for enough space for the case of emitting all state, and inline the
+    * radeonAllocCmdBuf code here without all the checks.
+    */
+   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
+   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+
+   /* We always always emit zbs, this is due to a bug found by keithw in
+      the hardware and rediscovered after Erics changes by me.
+      if you ever touch this code make sure you emit zbs otherwise
+      you get tcl lockups on at least M7/7500 class of chips - airlied */
+   rmesa->hw.zbs.dirty=1;
+
+   if (RADEON_DEBUG & DEBUG_STATE) {
+      foreach(atom, &rmesa->hw.atomlist) {
+        if (atom->dirty || rmesa->hw.all_dirty) {
+           if (atom->check(rmesa->glCtx))
+              print_state_atom(atom);
+           else
+              fprintf(stderr, "skip state %s\n", atom->name);
+        }
+      }
     }
  
-   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
-     foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
-       if (state->is_tcl) {
-        move_to_head( &(rmesa->hw.clean), state );
-       }
-     }
+   foreach(atom, &rmesa->hw.atomlist) {
+      if (rmesa->hw.all_dirty)
+        atom->dirty = GL_TRUE;
+      if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) &&
+          atom->is_tcl)
+        atom->dirty = GL_FALSE;
+      if (atom->dirty) {
+        if (atom->check(rmesa->glCtx)) {
+           int size = atom->cmd_size * 4;
+           memcpy(dest, atom->cmd, size);
+           dest += size;
+           rmesa->store.cmd_used += size;
+           atom->dirty = GL_FALSE;
+        }
+      }
     }
  
-   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
+   assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
+ 
+   rmesa->hw.is_dirty = GL_FALSE;
+   rmesa->hw.all_dirty = GL_FALSE;
  }
  
-
-
  /* Fire a section of the retained (indexed_verts) buffer as a regular
   * primtive.  
   */
@@ -192,7 +236,7 @@ extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
                                 GLuint primitive,
                                 GLuint vertex_nr )
  {
-   drmRadeonCmdHeader *cmd;
+   drm_radeon_cmd_header_t *cmd;
  
  
     assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
@@ -203,9 +247,10 @@ extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
        fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
               rmesa->store.cmd_used/4);
     
+   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
+                                                      __FUNCTION__ );
  #if RADEON_OLD_PACKETS
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 6 * sizeof(*cmd),
-                                                 __FUNCTION__ );
+   cmd[0].i = 0;
     cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
     cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
     cmd[2].i = rmesa->ioctl.vertex_offset;
@@ -222,8 +267,6 @@ extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
               __FUNCTION__,
               cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
  #else
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 4 * sizeof(*cmd),
-                                                 __FUNCTION__ );
     cmd[0].i = 0;
     cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
     cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
@@ -258,7 +301,7 @@ void radeonFlushElts( radeonContextPtr rmesa )
        fprintf(stderr, "%s\n", __FUNCTION__);
  
     assert( rmesa->dma.flush == radeonFlushElts );
-   rmesa->dma.flush = 0;
+   rmesa->dma.flush = NULL;
  
     /* Cope with odd number of elts:
      */
@@ -272,6 +315,11 @@ void radeonFlushElts( radeonContextPtr rmesa )
     cmd[1] |= (dwords - 3) << 16;
     cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
  #endif
+
+   if (RADEON_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+      radeonFinish( rmesa->glCtx );
+   }
  }
  
  
@@ -280,7 +328,7 @@ GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
                                     GLuint primitive,
                                     GLuint min_nr )
  {
-   drmRadeonCmdHeader *cmd;
+   drm_radeon_cmd_header_t *cmd;
     GLushort *retval;
  
     if (RADEON_DEBUG & DEBUG_IOCTL)
@@ -290,10 +338,10 @@ GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
     
     radeonEmitState( rmesa );
     
+   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
+                                                      ELTS_BUFSZ(min_nr),
+                                                      __FUNCTION__ );
  #if RADEON_OLD_PACKETS
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
-                                                 24 + min_nr*2,
-                                                 __FUNCTION__ );
     cmd[0].i = 0;
     cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
     cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
@@ -307,9 +355,6 @@ GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
  
     retval = (GLushort *)(cmd+6);
  #else   
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
-                                                 16 + min_nr*2,
-                                                 __FUNCTION__ );
     cmd[0].i = 0;
     cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
     cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
@@ -347,13 +392,13 @@ void radeonEmitVertexAOS( radeonContextPtr rmesa,
     rmesa->ioctl.vertex_size = vertex_size;
     rmesa->ioctl.vertex_offset = offset;
  #else
-   drmRadeonCmdHeader *cmd;
+   drm_radeon_cmd_header_t *cmd;
  
     if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
        fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
               __FUNCTION__, vertex_size, offset);
  
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 5 * sizeof(int),
+   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
                                                   __FUNCTION__ );
  
     cmd[0].i = 0;
@@ -378,8 +423,8 @@ void radeonEmitAOS( radeonContextPtr rmesa,
     rmesa->ioctl.vertex_offset = 
        (component[0]->aos_start + offset * component[0]->aos_stride * 4);
  #else
-   drmRadeonCmdHeader *cmd;
-   int sz = 3 + (nr/2 * 3) + (nr & 1) * 2;
+   drm_radeon_cmd_header_t *cmd;
+   int sz = AOS_BUFSZ(nr);
     int i;
     int *tmp;
  
@@ -387,11 +432,11 @@ void radeonEmitAOS( radeonContextPtr rmesa,
        fprintf(stderr, "%s\n", __FUNCTION__);
  
  
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sz * sizeof(int),
+   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
                                                   __FUNCTION__ );
     cmd[0].i = 0;
     cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | ((sz-3) << 16);
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
     cmd[2].i = nr;
     tmp = &cmd[0].i;
     cmd += 3;
@@ -431,7 +476,7 @@ void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is require
                    GLint dstx, GLint dsty,
                    GLuint w, GLuint h )
  {
-   drmRadeonCmdHeader *cmd;
+   drm_radeon_cmd_header_t *cmd;
  
     if (RADEON_DEBUG & DEBUG_IOCTL)
        fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
@@ -447,7 +492,7 @@ void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is require
     assert( w < (1<<16) );
     assert( h < (1<<16) );
  
-   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
+   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
                                                   __FUNCTION__ );
  
  
@@ -475,11 +520,11 @@ void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is require
  void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
  {
     if (rmesa->dri.drmMinor >= 6) {
-      drmRadeonCmdHeader *cmd;
+      drm_radeon_cmd_header_t *cmd;
  
        assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
        
-      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
+      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
                                                    __FUNCTION__ );
        cmd[0].i = 0;
        cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
@@ -492,7 +537,10 @@ static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
                                     const char * caller )
  {
     int ret, i;
-   drmRadeonCmdBuffer cmd;
+   drm_radeon_cmd_buffer_t cmd;
+
+   if (rmesa->lost_context)
+      radeonBackUpAndEmitLostStateLocked(rmesa);
  
     if (RADEON_DEBUG & DEBUG_IOCTL) {
        fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
@@ -529,10 +577,10 @@ static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
  
     if (rmesa->state.scissor.enabled) {
        cmd.nbox = rmesa->state.scissor.numClipRects;
-      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
+      cmd.boxes = rmesa->state.scissor.pClipRects;
     } else {
        cmd.nbox = rmesa->numClipRects;
-      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
+      cmd.boxes = rmesa->pClipRects;
     }
  
     ret = drmCommandWrite( rmesa->dri.fd,
@@ -542,12 +590,18 @@ static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
     if (ret)
        fprintf(stderr, "drmCommandWrite: %d\n", ret);
  
+   if (RADEON_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+      radeonWaitForIdleLocked( rmesa );
+   }
+
   out:
     rmesa->store.primnr = 0;
     rmesa->store.statenr = 0;
     rmesa->store.cmd_used = 0;
     rmesa->dma.nr_released_bufs = 0;
-   rmesa->lost_context = 1;    
+   rmesa->save_on_next_emit = 1;
+
     return ret;
  }
  
@@ -567,7 +621,7 @@ void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
     UNLOCK_HARDWARE( rmesa );
  
     if (ret) {
-      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
        exit(ret);
     }
  }
@@ -666,13 +720,13 @@ void radeonReleaseDmaRegion( radeonContextPtr rmesa,
        rmesa->dma.flush( rmesa );
  
     if (--region->buf->refcount == 0) {
-      drmRadeonCmdHeader *cmd;
+      drm_radeon_cmd_header_t *cmd;
  
        if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
          fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
                  region->buf->buf->idx);  
        
-      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
                                                      __FUNCTION__ );
        cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
        cmd->dma.buf_idx = region->buf->buf->idx;
@@ -680,7 +734,7 @@ void radeonReleaseDmaRegion( radeonContextPtr rmesa,
        rmesa->dma.nr_released_bufs++;
     }
  
-   region->buf = 0;
+   region->buf = NULL;
     region->start = 0;
  }
  
@@ -733,14 +787,14 @@ void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
   * SwapBuffers with client-side throttling
   */
  
-static CARD32 radeonGetLastFrame (radeonContextPtr rmesa) 
+static u_int32_t radeonGetLastFrame (radeonContextPtr rmesa) 
  {
     unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
     int ret;
-   CARD32 frame;
+   u_int32_t frame;
  
     if (rmesa->dri.screen->drmMinor >= 4) {
-      drmRadeonGetParam gp;
+      drm_radeon_getparam_t gp;
  
        gp.param = RADEON_PARAM_LAST_FRAME;
        gp.value = (int *)&frame;
@@ -750,14 +804,12 @@ static CARD32 radeonGetLastFrame (radeonContextPtr rmesa)
     else
        ret = -EINVAL;
  
-#ifndef __alpha__
     if ( ret == -EINVAL ) {
        frame = INREG( RADEON_LAST_FRAME_REG );
        ret = 0;
     } 
-#endif
     if ( ret ) {
-      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
        exit(1);
     }
  
@@ -766,14 +818,14 @@ static CARD32 radeonGetLastFrame (radeonContextPtr rmesa)
  
  static void radeonEmitIrqLocked( radeonContextPtr rmesa )
  {
-   drmRadeonIrqEmit ie;
+   drm_radeon_irq_emit_t ie;
     int ret;
  
     ie.irq_seq = &rmesa->iw.irq_seq;
     ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
                               &ie, sizeof(ie) );
     if ( ret ) {
-      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
        exit(1);
     }
  }
@@ -797,7 +849,7 @@ static void radeonWaitIrq( radeonContextPtr rmesa )
  
  static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
  {
-   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   drm_radeon_sarea_t *sarea = rmesa->sarea;
  
     if (rmesa->do_irqs) {
        if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
@@ -835,7 +887,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
     radeonContextPtr rmesa;
     GLint nbox, i, ret;
     GLboolean   missed_target;
-   uint64_t ust;
+   int64_t ust;
  
     assert(dPriv);
     assert(dPriv->driContextPriv);
@@ -844,7 +896,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
     rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
  
     if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
+      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
     }
  
     RADEON_FIREVERTICES( rmesa );
@@ -862,8 +914,8 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
  
     for ( i = 0 ; i < nbox ; ) {
        GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
-      XF86DRIClipRectPtr box = dPriv->pClipRects;
-      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->sarea->boxes;
        GLint n = 0;
  
        for ( ; i < nr ; i++ ) {
@@ -890,6 +942,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
     }
  
     rmesa->swap_ust = ust;
+   rmesa->hw.all_dirty = GL_TRUE;
  }
  
  void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
@@ -916,8 +969,8 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
      */
     if (dPriv->numClipRects)
     {
-      XF86DRIClipRectPtr box = dPriv->pClipRects;
-      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->sarea->boxes;
        b[0] = box[0];
        rmesa->sarea->nbox = 1;
     }
@@ -958,6 +1011,9 @@ void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
     rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
                                            + rmesa->radeonScreen->fbLocation;
     rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+   if (rmesa->sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+   }
  }
  
  
@@ -971,9 +1027,9 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
  {
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
     __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   drm_radeon_sarea_t *sarea = rmesa->sarea;
     unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
-   CARD32 clear;
+   u_int32_t clear;
     GLuint flags = 0;
     GLuint color_mask = 0;
     GLint ret, i;
@@ -983,33 +1039,35 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
                __FUNCTION__, all, cx, cy, cw, ch );
     }
  
-   radeonEmitState( rmesa );
-
-   /* Need to cope with lostcontext here as kernel relies on
-    * some residual state:
-    */
-   RADEON_FIREVERTICES( rmesa ); 
+   {
+      LOCK_HARDWARE( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+      if ( dPriv->numClipRects == 0 ) 
+        return;
+   }
+   
+   radeonFlush( ctx ); 
  
-   if ( mask & DD_FRONT_LEFT_BIT ) {
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
        flags |= RADEON_FRONT;
        color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-      mask &= ~DD_FRONT_LEFT_BIT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
     }
  
-   if ( mask & DD_BACK_LEFT_BIT ) {
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
        flags |= RADEON_BACK;
        color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-      mask &= ~DD_BACK_LEFT_BIT;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
     }
  
-   if ( mask & DD_DEPTH_BIT ) {
-      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
-      mask &= ~DD_DEPTH_BIT;
+   if ( mask & BUFFER_BIT_DEPTH ) {
+      flags |= RADEON_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
     }
  
-   if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
+   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
        flags |= RADEON_STENCIL;
-      mask &= ~DD_STENCIL_BIT;
+      mask &= ~BUFFER_BIT_STENCIL;
     }
  
     if ( mask ) {
@@ -1021,6 +1079,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
     if ( !flags ) 
        return;
  
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
+         flags |= RADEON_USE_HIERZ; */
+      if (!(rmesa->state.stencil.hwBuffer) ||
+        ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+           ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+         flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
  
     /* Flip top to bottom */
     cx += dPriv->x;
@@ -1034,7 +1102,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        int ret;
  
        if (rmesa->dri.screen->drmMinor >= 4) {
-       drmRadeonGetParam gp;
+       drm_radeon_getparam_t gp;
  
         gp.param = RADEON_PARAM_LAST_CLEAR;
         gp.value = (int *)&clear;
@@ -1043,14 +1111,12 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        } else
         ret = -EINVAL;
  
-#ifndef __alpha__
        if ( ret == -EINVAL ) {
          clear = INREG( RADEON_LAST_CLEAR_REG );
          ret = 0;
        }
-#endif
        if ( ret ) {
-        fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+        fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
          exit(1);
        }
        if ( RADEON_DEBUG & DEBUG_IOCTL ) {
@@ -1069,12 +1135,15 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        }
     }
  
+   /* Send current state to the hardware */
+   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+
     for ( i = 0 ; i < dPriv->numClipRects ; ) {
        GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-      XF86DRIClipRectPtr box = dPriv->pClipRects;
-      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
-      drmRadeonClearType clear;
-      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->sarea->boxes;
+      drm_radeon_clear_t clear;
+      drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
        GLint n = 0;
  
        if ( !all ) {
@@ -1117,16 +1186,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        n--;
        b = rmesa->sarea->boxes;
        for ( ; n >= 0 ; n-- ) {
-        depth_boxes[n].f[RADEON_CLEAR_X1] = (float)b[n].x1;
-        depth_boxes[n].f[RADEON_CLEAR_Y1] = (float)b[n].y1;
-        depth_boxes[n].f[RADEON_CLEAR_X2] = (float)b[n].x2;
-        depth_boxes[n].f[RADEON_CLEAR_Y2] = (float)b[n].y2;
-        depth_boxes[n].f[RADEON_CLEAR_DEPTH] = 
+        depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+        depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+        depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
+        depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
+        depth_boxes[n].f[CLEAR_DEPTH] = 
             (float)rmesa->state.depth.clear;
        }
  
        ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-                            &clear, sizeof(drmRadeonClearType));
+                            &clear, sizeof(drm_radeon_clear_t));
  
        if ( ret ) {
          UNLOCK_HARDWARE( rmesa );
@@ -1136,6 +1205,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
     }
  
     UNLOCK_HARDWARE( rmesa );
+   rmesa->hw.all_dirty = GL_TRUE;
  }
  
  
@@ -1179,8 +1249,7 @@ void radeonFlush( GLcontext *ctx )
     if (rmesa->dma.flush)
        rmesa->dma.flush( rmesa );
  
-   if (!is_empty_list(&rmesa->hw.dirty)) 
-      radeonEmitState( rmesa );
+   radeonEmitState( rmesa );
     
     if (rmesa->store.cmd_used)
        radeonFlushCmdBuf( rmesa, __FUNCTION__ );