Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r200 / r200_state_init.c
index 1a13c9ec13db7b8fc85e3f7c2618e9e67a63b437..78ad5baebb343b1195f6a0546e958e7140cc461d 100644 (file)
@@ -166,22 +166,6 @@ static struct {
 /* =============================================================
  * State initialization
  */
-
-void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
-{
-   struct radeon_state_atom *l;
-
-   fprintf(stderr, msg);
-   fprintf(stderr, ": ");
-
-   foreach(l, &rmesa->radeon.hw.atomlist) {
-      if (l->dirty || rmesa->radeon.hw.all_dirty)
-        fprintf(stderr, "%s, ", l->name);
-   }
-
-   fprintf(stderr, "\n");
-}
-
 static int cmdpkt( r200ContextPtr rmesa, int id ) 
 {
    drm_radeon_cmd_header_t h;
@@ -305,17 +289,22 @@ VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 9
     OUT_BATCH_TABLE((data), h.vectors.count);                          \
   } while(0)
 
-#define OUT_VECLINEAR(hdr, data) do {                  \
-    drm_radeon_cmd_header_t h;                                 \
-    uint32_t _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);        \
-    uint32_t _sz = h.veclinear.count * 4;                              \
+#define OUT_VECLINEAR(hdr, data) do {                                  \
+    drm_radeon_cmd_header_t h;                                         \
+    uint32_t _start, _sz;                                              \
     h.i = hdr;                                                         \
+    _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);         \
+    _sz = h.veclinear.count * 4;                                       \
+    if (r200->radeon.radeonScreen->kernel_mm && _sz) { \
+    BEGIN_BATCH_NO_AUTOSTATE(dwords); \
     OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));               \
     OUT_BATCH(0);                                                      \
     OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));             \
     OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));   \
     OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));   \
     OUT_BATCH_TABLE((data), _sz);                                      \
+    END_BATCH(); \
+    } \
   } while(0)
 
 #define OUT_SCL(hdr, data) do {                                        \
@@ -342,6 +331,7 @@ static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
+   dwords += 6;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
    OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
    OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
@@ -354,6 +344,7 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
+   dwords += 8;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
    OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
    OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
@@ -366,6 +357,7 @@ static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
+   dwords += 8;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
    OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
    OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
@@ -378,9 +370,8 @@ static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
-   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   dwords += 4;
    OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
-   END_BATCH();
 }
 
 static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
@@ -389,6 +380,7 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
+   dwords += 2;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
    OUT_SCL(atom->cmd[0], atom->cmd+1);
    END_BATCH();
@@ -401,6 +393,7 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
 
+   dwords += 4;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
    OUT_VEC(atom->cmd[0], atom->cmd+1);
    END_BATCH();
@@ -412,36 +405,47 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    BATCH_LOCALS(&r200->radeon);
    struct radeon_renderbuffer *rrb;
    uint32_t cbpitch;
-   uint32_t zbpitch;
+   uint32_t zbpitch, depth_fmt;
    uint32_t dwords = atom->cmd_size;
-   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
 
    /* output the first 7 bytes of context */
    BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
    OUT_BATCH_TABLE(atom->cmd, 5);
 
-   rrb = r200->radeon.state.depth.rrb;
+   rrb = radeon_get_depthbuffer(&r200->radeon);
    if (!rrb) {
      OUT_BATCH(0);
      OUT_BATCH(0);
    } else {
      zbpitch = (rrb->pitch / rrb->cpp);
+     if (r200->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
      OUT_BATCH(zbpitch);
+     if (rrb->cpp == 4) 
+       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; 
+     else 
+       depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; 
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; 
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; 
    }
      
    OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
    OUT_BATCH(atom->cmd[CTX_CMD_1]);
    OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
 
-   rrb = r200->radeon.state.color.rrb;
-   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
-      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-   }
+   rrb = radeon_get_colorbuffer(&r200->radeon);
    if (!rrb || !rrb->bo) {
+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
      OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
    } else {
+     atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); 
+     if (rrb->cpp == 4) 
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; 
+     else 
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; 
+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); 
      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
    }
 
@@ -451,11 +455,7 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
      OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
    } else {
      cbpitch = (rrb->pitch / rrb->cpp);
-     if (rrb->cpp == 4)
-       ;
-     else
-       ;
-     if (r200->radeon.sarea->tiling_enabled)
+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
        cbpitch |= R200_COLOR_TILE_ENABLE;
      OUT_BATCH(cbpitch);
    }
@@ -474,22 +474,50 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
    uint32_t cbpitch = 0;
    uint32_t zbpitch = 0;
    uint32_t dwords = atom->cmd_size;
-   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
+   uint32_t depth_fmt;
 
-   rrb = r200->radeon.state.color.rrb;
-   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
-      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+      return;
    }
-   if (rrb) {
-     assert(rrb->bo != NULL);
-     cbpitch = (rrb->pitch / rrb->cpp);
-     if (r200->radeon.sarea->tiling_enabled)
+
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base._ActualFormat) {
+   case GL_RGB5:
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+       break;
+   case GL_RGBA4:
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+       break;
+   case GL_RGB5_A1:
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+       break;
+   }
+
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
        cbpitch |= R200_COLOR_TILE_ENABLE;
+
+   drb = radeon_get_depthbuffer(&r200->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
    }
 
-   drb = r200->radeon.state.depth.rrb;
+   dwords = 10;
    if (drb)
-     zbpitch = (drb->pitch / drb->cpp);
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+     dwords += 4;
 
    /* output the first 7 bytes of context */
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
@@ -500,7 +528,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
 
    if (drb) {
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
-     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
      OUT_BATCH(zbpitch);
@@ -516,15 +544,9 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
    if (rrb) {
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-   }
 
-   if (rrb) {
-     if (rrb->cpp == 4)
-       ;
-     else
-       ;
      OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
-     OUT_BATCH(cbpitch);
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
    }
 
    if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
@@ -542,39 +564,110 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
    int i = atom->idx;
    radeonTexObj *t = r200->state.texture.unit[i].texobj;
 
+   if (t && t->mt && !t->image_override)
+     dwords += 2;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   /* is this ok even with drm older than 1.18? */
    OUT_BATCH_TABLE(atom->cmd, 10);
-   if (t && !t->image_override) {
+
+   if (t && t->mt && !t->image_override) {
      OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-                    RADEON_GEM_DOMAIN_VRAM, 0, 0);
+                 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
    } else if (!t) {
-
-     OUT_BATCH(atom->cmd[10]);
+     /* workaround for old CS mechanism */
+     OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
+   } else {
+     OUT_BATCH(t->override_offset);
    }
 
    END_BATCH();
 }
 
-static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
 {
    r200ContextPtr r200 = R200_CONTEXT(ctx);
    BATCH_LOCALS(&r200->radeon);
    uint32_t dwords = atom->cmd_size;
    int i = atom->idx;
    radeonTexObj *t = r200->state.texture.unit[i].texobj;
-   GLuint size;
+   int hastexture = 1;
 
+   if (!r200->state.texture.unit[i].unitneeded)
+        hastexture = 0;
+   if (!t)
+       hastexture = 0;
+   else {
+       if (!t->mt && !t->bo)
+               hastexture = 0;
+   }
+
+   if (hastexture)
+     dwords += 2;
+   else
+     dwords -= 2;
    BEGIN_BATCH_NO_AUTOSTATE(dwords);
-   OUT_BATCH_TABLE(atom->cmd, 3);
 
-   fprintf(stderr,"total size is %d\n", t->mt->totalsize);
+   OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
+   OUT_BATCH_TABLE((atom->cmd + 1), 8);
+
+   if (hastexture) {
+     OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+                 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      } else {
+       if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+   END_BATCH();
+}
+
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = 3;
+   int i = atom->idx, j;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords + (3 * 5));
+   /* XXX that size won't really match with image_override... */
+   OUT_BATCH_TABLE(atom->cmd, 2);
+
+   if (t && !t->image_override) {
+     lvl = &t->mt->levels[0];
+     OUT_BATCH_TABLE((atom->cmd + 2), 1);
+     for (j = 1; j <= 5; j++) {
+       OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+                       RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   }
+   END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = 2;
+   int i = atom->idx, j;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5));
+   OUT_BATCH_TABLE(atom->cmd, 2);
+
    if (t && !t->image_override) {
-     size = t->mt->totalsize / 6;
-     OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-     OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-     OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-     OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-     OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     lvl = &t->mt->levels[0];
+     for (j = 1; j <= 5; j++) {
+       OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
+       OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+                       RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
    }
    END_BATCH();
 }
@@ -584,68 +677,24 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
 void r200InitState( r200ContextPtr rmesa )
 {
    GLcontext *ctx = rmesa->radeon.glCtx;
-   GLuint color_fmt, depth_fmt, i;
-   GLint drawPitch, drawOffset;
-
-   switch ( rmesa->radeon.radeonScreen->cpp ) {
-   case 2:
-      color_fmt = R200_COLOR_FORMAT_RGB565;
-      break;
-   case 4:
-      color_fmt = R200_COLOR_FORMAT_ARGB8888;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
-      exit( -1 );
-   }
+   GLuint i;
 
    rmesa->radeon.state.color.clear = 0x00000000;
 
    switch ( ctx->Visual.depthBits ) {
    case 16:
       rmesa->radeon.state.depth.clear = 0x0000ffff;
-      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
-      depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
       rmesa->radeon.state.stencil.clear = 0x00000000;
       break;
    case 24:
+   default:
       rmesa->radeon.state.depth.clear = 0x00ffffff;
-      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
-      depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
       rmesa->radeon.state.stencil.clear = 0xffff0000;
       break;
-   default:
-      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-              ctx->Visual.depthBits );
-      exit( -1 );
    }
 
-   /* Only have hw stencil when depth buffer is 24 bits deep */
-   rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-                                    ctx->Visual.depthBits == 24 );
-
    rmesa->radeon.Fallback = 0;
 
-   if ( ctx->Visual.doubleBufferMode && rmesa->radeon.sarea->pfCurrentPage == 0 ) {
-      drawOffset = rmesa->radeon.radeonScreen->backOffset;
-      drawPitch  = rmesa->radeon.radeonScreen->backPitch;
-   } else {
-      drawOffset = rmesa->radeon.radeonScreen->frontOffset;
-      drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
-   }
-#if 000
-   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
-      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->backOffset;
-      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->backPitch;
-   } else {
-      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->frontOffset;
-      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
-   }
-
-   rmesa->state.pixel.readOffset = rmesa->radeon.state.color.drawOffset;
-   rmesa->state.pixel.readPitch  = rmesa->radeon.state.color.drawPitch;
-#endif
-
    rmesa->radeon.hw.max_state_size = 0;
 
 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )                          \
@@ -723,8 +772,11 @@ void r200InitState( r200ContextPtr rmesa )
       ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
    }
 
-   for (i = 0; i < 5; i++)
-     rmesa->hw.tex[i].emit = tex_emit;
+   for (i = 0; i < 6; i++)
+      if (rmesa->radeon.radeonScreen->kernel_mm)
+          rmesa->hw.tex[i].emit = tex_emit_cs;
+      else
+          rmesa->hw.tex[i].emit = tex_emit;
    if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
       ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
       ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
@@ -732,8 +784,11 @@ void r200InitState( r200ContextPtr rmesa )
       ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
       ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
       ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
-      for (i = 0; i < 5; i++)
-       rmesa->hw.cube[i].emit = cube_emit;
+      for (i = 0; i < 6; i++)
+          if (rmesa->radeon.radeonScreen->kernel_mm)
+              rmesa->hw.cube[i].emit = cube_emit_cs;
+          else
+              rmesa->hw.cube[i].emit = cube_emit;
    }
    else {
       ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
@@ -1023,8 +1078,7 @@ void r200InitState( r200ContextPtr rmesa )
    if (rmesa->using_hyperz)
       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
 
-   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
-                                              R200_Z_TEST_LESS |
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
                                               R200_STENCIL_TEST_ALWAYS |
                                               R200_STENCIL_FAIL_KEEP |
                                               R200_STENCIL_ZPASS_KEEP |
@@ -1041,7 +1095,6 @@ void r200InitState( r200ContextPtr rmesa )
    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
                                     | R200_TEX_BLEND_0_ENABLE);
 
-   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
    switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
    case DRI_CONF_DITHER_XERRORDIFFRESET:
       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
@@ -1061,28 +1114,6 @@ void r200InitState( r200ContextPtr rmesa )
    else
       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
 
-#if 000
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->radeon.state.color.drawOffset +
-                                              rmesa->radeon.radeonScreen->fbLocation)
-                                             & R200_COLOROFFSET_MASK);
-
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->radeon.state.color.drawPitch &
-                                             R200_COLORPITCH_MASK) |
-                                            R200_COLOR_ENDIAN_NO_SWAP);
-#else
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
-                                              rmesa->radeon.radeonScreen->fbLocation)
-                                             & R200_COLOROFFSET_MASK);
-
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
-                                             R200_COLORPITCH_MASK) |
-                                            R200_COLOR_ENDIAN_NO_SWAP);
-#endif
-   /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
-   if (rmesa->radeon.sarea->tiling_enabled) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-   }
-
    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * 
                        driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;