Merge branch 'radeon-texrewrite-clean' into mesa_7_7_branch
[mesa.git] / src / mesa / drivers / dri / r600 / r700_chip.c
index c3c0923ebb0482773c8c4a4baf565e87db1ddce3..2b2b4d748f6c7e74ebf4d49f1316f89ca0f64b22 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "main/imports.h"
 #include "main/glheader.h"
+#include "main/simple_list.h"
 
 #include "r600_context.h"
 #include "r600_cmdbuf.h"
 
 #include "radeon_mipmap_tree.h"
 
-#define LINK_STATES(reg)                                            \
-do                                                                  \
-{                                                                   \
-    pStateListWork->puiValue = (unsigned int*)&(r700->reg);         \
-    pStateListWork->unOffset = mm##reg - ASIC_CONTEXT_BASE_INDEX; \
-    pStateListWork->pNext    = pStateListWork + 1;                  \
-    pStateListWork++;                                               \
-}while(0)
-
-GLboolean r700InitChipObject(context_t *context)
-{
-    ContextState * pStateListWork;
-
-    R700_CHIP_CONTEXT *r700 = &context->hw;
-
-    /* init state list */
-    r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int));
-    pStateListWork = r700->pStateList;
-
-    // misc
-    LINK_STATES(TA_CNTL_AUX);
-    LINK_STATES(VC_ENHANCE);
-    LINK_STATES(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ);
-    LINK_STATES(DB_DEBUG);
-    LINK_STATES(DB_WATERMARKS);
-
-    // SC
-    LINK_STATES(PA_SC_SCREEN_SCISSOR_TL);
-    LINK_STATES(PA_SC_SCREEN_SCISSOR_BR);
-    LINK_STATES(PA_SC_WINDOW_OFFSET);
-    LINK_STATES(PA_SC_WINDOW_SCISSOR_TL);
-    LINK_STATES(PA_SC_WINDOW_SCISSOR_BR);
-    LINK_STATES(PA_SC_CLIPRECT_RULE);
-    LINK_STATES(PA_SC_CLIPRECT_0_TL);
-    LINK_STATES(PA_SC_CLIPRECT_0_BR);
-    LINK_STATES(PA_SC_CLIPRECT_1_TL);
-    LINK_STATES(PA_SC_CLIPRECT_1_BR);
-    LINK_STATES(PA_SC_CLIPRECT_2_TL);
-    LINK_STATES(PA_SC_CLIPRECT_2_BR);
-    LINK_STATES(PA_SC_CLIPRECT_3_TL);
-    LINK_STATES(PA_SC_CLIPRECT_3_BR);
-    LINK_STATES(PA_SC_EDGERULE);
-    LINK_STATES(PA_SC_GENERIC_SCISSOR_TL);
-    LINK_STATES(PA_SC_GENERIC_SCISSOR_BR);
-    LINK_STATES(PA_SC_LINE_STIPPLE);
-    LINK_STATES(PA_SC_MPASS_PS_CNTL);
-    LINK_STATES(PA_SC_MODE_CNTL);
-    LINK_STATES(PA_SC_LINE_CNTL);
-    LINK_STATES(PA_SC_AA_CONFIG);
-    LINK_STATES(PA_SC_AA_SAMPLE_LOCS_MCTX);
-    LINK_STATES(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX);
-    LINK_STATES(PA_SC_AA_MASK);
-
-    // SU
-    LINK_STATES(PA_SU_POINT_SIZE);
-    LINK_STATES(PA_SU_POINT_MINMAX);
-    LINK_STATES(PA_SU_LINE_CNTL);
-    LINK_STATES(PA_SU_SC_MODE_CNTL);
-    LINK_STATES(PA_SU_VTX_CNTL);
-    LINK_STATES(PA_SU_POLY_OFFSET_DB_FMT_CNTL);
-    LINK_STATES(PA_SU_POLY_OFFSET_CLAMP);
-    LINK_STATES(PA_SU_POLY_OFFSET_FRONT_SCALE);
-    LINK_STATES(PA_SU_POLY_OFFSET_FRONT_OFFSET);
-    LINK_STATES(PA_SU_POLY_OFFSET_BACK_SCALE);
-    LINK_STATES(PA_SU_POLY_OFFSET_BACK_OFFSET);
-
-    // CL
-    LINK_STATES(PA_CL_CLIP_CNTL);
-    LINK_STATES(PA_CL_VTE_CNTL);
-    LINK_STATES(PA_CL_VS_OUT_CNTL);
-    LINK_STATES(PA_CL_NANINF_CNTL);
-    LINK_STATES(PA_CL_GB_VERT_CLIP_ADJ);
-    LINK_STATES(PA_CL_GB_VERT_DISC_ADJ);
-    LINK_STATES(PA_CL_GB_HORZ_CLIP_ADJ);
-    LINK_STATES(PA_CL_GB_HORZ_DISC_ADJ);
-
-    // CB
-    LINK_STATES(CB_CLEAR_RED_R6XX);
-    LINK_STATES(CB_CLEAR_GREEN_R6XX);
-    LINK_STATES(CB_CLEAR_BLUE_R6XX);
-    LINK_STATES(CB_CLEAR_ALPHA_R6XX);
-    LINK_STATES(CB_TARGET_MASK);
-    LINK_STATES(CB_SHADER_MASK);
-    LINK_STATES(CB_BLEND_RED);
-    LINK_STATES(CB_BLEND_GREEN);
-    LINK_STATES(CB_BLEND_BLUE);
-    LINK_STATES(CB_BLEND_ALPHA);
-    LINK_STATES(CB_FOG_RED_R6XX);
-    LINK_STATES(CB_FOG_GREEN_R6XX);
-    LINK_STATES(CB_FOG_BLUE_R6XX);
-    LINK_STATES(CB_SHADER_CONTROL);
-    LINK_STATES(CB_COLOR_CONTROL);
-    LINK_STATES(CB_CLRCMP_CONTROL);
-    LINK_STATES(CB_CLRCMP_SRC);
-    LINK_STATES(CB_CLRCMP_DST);
-    LINK_STATES(CB_CLRCMP_MSK);
-    LINK_STATES(CB_BLEND_CONTROL);
-
-    //DB
-    LINK_STATES(DB_HTILE_DATA_BASE);
-    LINK_STATES(DB_STENCIL_CLEAR);
-    LINK_STATES(DB_DEPTH_CLEAR);
-    LINK_STATES(DB_STENCILREFMASK);
-    LINK_STATES(DB_STENCILREFMASK_BF);
-    LINK_STATES(DB_DEPTH_CONTROL);
-    LINK_STATES(DB_SHADER_CONTROL);
-    LINK_STATES(DB_RENDER_CONTROL);
-    LINK_STATES(DB_RENDER_OVERRIDE);
-    LINK_STATES(DB_HTILE_SURFACE);
-    LINK_STATES(DB_ALPHA_TO_MASK);
-
-    // SX
-    LINK_STATES(SX_MISC);
-    LINK_STATES(SX_ALPHA_TEST_CONTROL);
-    LINK_STATES(SX_ALPHA_REF);
-
-    // VGT
-    LINK_STATES(VGT_MAX_VTX_INDX);
-    LINK_STATES(VGT_MIN_VTX_INDX);
-    LINK_STATES(VGT_INDX_OFFSET);
-    LINK_STATES(VGT_MULTI_PRIM_IB_RESET_INDX);
-    LINK_STATES(VGT_OUTPUT_PATH_CNTL);
-    LINK_STATES(VGT_HOS_CNTL);
-    LINK_STATES(VGT_HOS_MAX_TESS_LEVEL);
-    LINK_STATES(VGT_HOS_MIN_TESS_LEVEL);
-    LINK_STATES(VGT_HOS_REUSE_DEPTH);
-    LINK_STATES(VGT_GROUP_PRIM_TYPE);
-    LINK_STATES(VGT_GROUP_FIRST_DECR);
-    LINK_STATES(VGT_GROUP_DECR);
-    LINK_STATES(VGT_GROUP_VECT_0_CNTL);
-    LINK_STATES(VGT_GROUP_VECT_1_CNTL);
-    LINK_STATES(VGT_GROUP_VECT_0_FMT_CNTL);
-    LINK_STATES(VGT_GROUP_VECT_1_FMT_CNTL);
-    LINK_STATES(VGT_GS_MODE);
-    LINK_STATES(VGT_PRIMITIVEID_EN);
-    LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN);
-    LINK_STATES(VGT_INSTANCE_STEP_RATE_0);
-    LINK_STATES(VGT_INSTANCE_STEP_RATE_1);
-    LINK_STATES(VGT_STRMOUT_EN);
-    LINK_STATES(VGT_REUSE_OFF);
-    LINK_STATES(VGT_VTX_CNT_EN);
-    LINK_STATES(VGT_STRMOUT_BUFFER_EN);
-
-    LINK_STATES(SQ_VTX_SEMANTIC_0);
-    LINK_STATES(SQ_VTX_SEMANTIC_1);
-    LINK_STATES(SQ_VTX_SEMANTIC_2);
-    LINK_STATES(SQ_VTX_SEMANTIC_3);
-    LINK_STATES(SQ_VTX_SEMANTIC_4);
-    LINK_STATES(SQ_VTX_SEMANTIC_5);
-    LINK_STATES(SQ_VTX_SEMANTIC_6);
-    LINK_STATES(SQ_VTX_SEMANTIC_7);
-    LINK_STATES(SQ_VTX_SEMANTIC_8);
-    LINK_STATES(SQ_VTX_SEMANTIC_9);
-    LINK_STATES(SQ_VTX_SEMANTIC_10);
-    LINK_STATES(SQ_VTX_SEMANTIC_11);
-    LINK_STATES(SQ_VTX_SEMANTIC_12);
-    LINK_STATES(SQ_VTX_SEMANTIC_13);
-    LINK_STATES(SQ_VTX_SEMANTIC_14);
-    LINK_STATES(SQ_VTX_SEMANTIC_15);
-    LINK_STATES(SQ_VTX_SEMANTIC_16);
-    LINK_STATES(SQ_VTX_SEMANTIC_17);
-    LINK_STATES(SQ_VTX_SEMANTIC_18);
-    LINK_STATES(SQ_VTX_SEMANTIC_19);
-    LINK_STATES(SQ_VTX_SEMANTIC_20);
-    LINK_STATES(SQ_VTX_SEMANTIC_21);
-    LINK_STATES(SQ_VTX_SEMANTIC_22);
-    LINK_STATES(SQ_VTX_SEMANTIC_23);
-    LINK_STATES(SQ_VTX_SEMANTIC_24);
-    LINK_STATES(SQ_VTX_SEMANTIC_25);
-    LINK_STATES(SQ_VTX_SEMANTIC_26);
-    LINK_STATES(SQ_VTX_SEMANTIC_27);
-    LINK_STATES(SQ_VTX_SEMANTIC_28);
-    LINK_STATES(SQ_VTX_SEMANTIC_29);
-    LINK_STATES(SQ_VTX_SEMANTIC_30);
-    LINK_STATES(SQ_VTX_SEMANTIC_31);
-
-    // SPI
-    LINK_STATES(SPI_VS_OUT_ID_0);
-    LINK_STATES(SPI_VS_OUT_ID_1);
-    LINK_STATES(SPI_VS_OUT_ID_2);
-    LINK_STATES(SPI_VS_OUT_ID_3);
-    LINK_STATES(SPI_VS_OUT_ID_4);
-    LINK_STATES(SPI_VS_OUT_ID_5);
-    LINK_STATES(SPI_VS_OUT_ID_6);
-    LINK_STATES(SPI_VS_OUT_ID_7);
-    LINK_STATES(SPI_VS_OUT_ID_8);
-    LINK_STATES(SPI_VS_OUT_ID_9);
-
-    LINK_STATES(SPI_VS_OUT_CONFIG);
-    LINK_STATES(SPI_THREAD_GROUPING);
-    LINK_STATES(SPI_PS_IN_CONTROL_0);
-    LINK_STATES(SPI_PS_IN_CONTROL_1);
-    LINK_STATES(SPI_INTERP_CONTROL_0);
-    LINK_STATES(SPI_INPUT_Z);
-    LINK_STATES(SPI_FOG_CNTL);
-    LINK_STATES(SPI_FOG_FUNC_SCALE);
-    LINK_STATES(SPI_FOG_FUNC_BIAS);
-
-    // SQ
-    LINK_STATES(SQ_ESGS_RING_ITEMSIZE);
-    LINK_STATES(SQ_GSVS_RING_ITEMSIZE);
-    LINK_STATES(SQ_ESTMP_RING_ITEMSIZE);
-    LINK_STATES(SQ_GSTMP_RING_ITEMSIZE);
-    LINK_STATES(SQ_VSTMP_RING_ITEMSIZE);
-    LINK_STATES(SQ_PSTMP_RING_ITEMSIZE);
-    LINK_STATES(SQ_FBUF_RING_ITEMSIZE);
-    LINK_STATES(SQ_REDUC_RING_ITEMSIZE);
-    //LINK_STATES(SQ_GS_VERT_ITEMSIZE);
-
-    pStateListWork->puiValue = (unsigned int*)&(r700->SQ_GS_VERT_ITEMSIZE);
-    pStateListWork->unOffset = mmSQ_GS_VERT_ITEMSIZE - ASIC_CONTEXT_BASE_INDEX;
-    pStateListWork->pNext    = NULL;  /* END OF STATE LIST */
-
-    return GL_TRUE;
-}
-
-GLboolean r700SendTextureState(context_t *context)
-{
-    unsigned int i;
-    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
-    struct radeon_bo *bo = NULL;
-    BATCH_LOCALS(&context->radeon);
+static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t         *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       struct radeon_bo *bo = NULL;
+       unsigned int i;
+       BATCH_LOCALS(&context->radeon);
 
-    for (i=0; i<R700_TEXTURE_NUMBERUNITS; i++) {
-           radeonTexObj *t = r700->textures[i];
-           if (t) {
-                   if (!t->image_override)
-                           bo = t->mt->bo;
-                   else
-                           bo = t->bo;
-                   if (bo) {
-
-                           r700SyncSurf(context, bo,
-                                        RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
-                                        0, TC_ACTION_ENA_bit);
-
-                           BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
-                           R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-                           R600_OUT_BATCH(i * 7);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
-                           R600_OUT_BATCH(0); /* r700->textures[i]->SQ_TEX_RESOURCE2 */
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
-                           R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
-                                                bo,
-                                                0,
-                                                RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-                           R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
-                                                bo,
-                                                r700->textures[i]->SQ_TEX_RESOURCE3,
-                                                RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-                           END_BATCH();
-
-                           BEGIN_BATCH_NO_AUTOSTATE(5);
-                           R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
-                           R600_OUT_BATCH(i * 3);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
-                           R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
-                           END_BATCH();
-
-                           BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
-                           R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
-                           R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
-                           R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
-                           R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
-                           R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
-                           END_BATCH();
-
-                           COMMIT_BATCH();
-                   }
-           }
-    }
-    return GL_TRUE;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                       radeonTexObj *t = r700->textures[i];
+                       uint32_t offset;
+                       if (t) {
+                               if (!t->image_override) {
+                                       bo = t->mt->bo;
+                                       offset = get_base_teximage_offset(t);
+                               } else {
+                                       bo = t->bo;
+                                       offset = 0;
+                               }
+                               if (bo) {
+
+                                       r700SyncSurf(context, bo,
+                                                    RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+                                                    0, TC_ACTION_ENA_bit);
+
+                                       BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+                                       R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+                                       R600_OUT_BATCH(i * 7);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
+                                       R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
+                                       R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
+                                                            bo,
+                                                            offset,
+                                                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+                                       R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
+                                                            bo,
+                                                            r700->textures[i]->SQ_TEX_RESOURCE3,
+                                                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+                                       END_BATCH();
+                                       COMMIT_BATCH();
+                               }
+                       }
+               }
+       }
 }
 
-void r700SetupVTXConstants(GLcontext  * ctx,
-                          unsigned int nStreamID,
-                          void *       pAos,
-                          unsigned int size,      /* number of elements in vector */
-                          unsigned int stride,
-                          unsigned int count)     /* number of vectors in stream */
+static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t         *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       unsigned int i;
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                       radeonTexObj *t = r700->textures[i];
+                       if (t) {
+                               BEGIN_BATCH_NO_AUTOSTATE(5);
+                               R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+                               R600_OUT_BATCH(i * 3);
+                               R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
+                               R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
+                               R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
+                               END_BATCH();
+                               COMMIT_BATCH();
+                       }
+               }
+       }
+}
+
+static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t         *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       unsigned int i;
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                       radeonTexObj *t = r700->textures[i];
+                       if (t) {
+                               BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
+                               R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
+                               R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
+                               R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
+                               R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
+                               R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
+                               END_BATCH();
+                               COMMIT_BATCH();
+                       }
+               }
+       }
+}
+
+extern int getTypeSize(GLenum type);
+static void r700SetupVTXConstants(GLcontext  * ctx,
+                                 void *       pAos,
+                                 StreamDesc * pStreamDesc)
 {
     context_t *context = R700_CONTEXT(ctx);
     struct radeon_aos * paos = (struct radeon_aos *)pAos;
+    unsigned int nVBsize;
     BATCH_LOCALS(&context->radeon);
 
     unsigned int uSQ_VTX_CONSTANT_WORD0_0;
@@ -342,23 +167,46 @@ void r700SetupVTXConstants(GLcontext  * ctx,
     if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
            r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
     else
            r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
 
+    if(0 == pStreamDesc->stride)
+    {
+        nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
+    }
+    else
+    {
+        nVBsize = paos->count * pStreamDesc->stride;
+    }
+
     uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
-    uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
+    uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1;
 
     SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+    SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
             SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
+    SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL),
             SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
             SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
-            SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
-    SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+    
+    if(GL_TRUE == pStreamDesc->normalize)
+    {
+        SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
+                    SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+    }
+    //else
+    //{
+    //    SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT,
+       //             SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+    //}
+
+    if(1 == pStreamDesc->_signed)
+    {
+        SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+    }
 
     SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
     SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
@@ -367,7 +215,7 @@ void r700SetupVTXConstants(GLcontext  * ctx,
     BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
 
     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-    R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+    R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
     R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
     R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
     R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
@@ -384,19 +232,16 @@ void r700SetupVTXConstants(GLcontext  * ctx,
 
 }
 
-int r700SetupStreams(GLcontext * ctx)
+static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
     context_t         *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program *vp = context->selected_vp;
+    unsigned int i, j = 0;
     BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-    struct r700_vertex_program *vpc
-             = (struct r700_vertex_program *)ctx->VertexProgram._Current;
-
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *vb = &tnl->vb;
-
-    unsigned int unBit;
-    unsigned int i, j = 0;
+    if (context->radeon.tcl.aos_count == 0)
+           return;
 
     BEGIN_BATCH_NO_AUTOSTATE(6);
     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
@@ -409,113 +254,119 @@ int r700SetupStreams(GLcontext * ctx)
     END_BATCH();
     COMMIT_BATCH();
 
-       for(i=0; i<VERT_ATTRIB_MAX; i++)
-       {
-               unBit = 1 << i;
-               if(vpc->mesa_program.Base.InputsRead & unBit)
-               {
-                       rcommon_emit_vector(ctx,
-                                           &context->radeon.tcl.aos[j],
-                                           vb->AttribPtr[i]->data,
-                                           vb->AttribPtr[i]->size,
-                                           vb->AttribPtr[i]->stride,
-                                           vb->Count);
-
-                       /* currently aos are packed */
-                       r700SetupVTXConstants(ctx,
-                                             i,
-                                             (void*)(&context->radeon.tcl.aos[j]),
-                                             (unsigned int)context->radeon.tcl.aos[j].components,
-                                             (unsigned int)context->radeon.tcl.aos[j].stride * 4,
-                                             (unsigned int)context->radeon.tcl.aos[j].count);
-                       j++;
-               }
-       }
-       context->radeon.tcl.aos_count = j;
-
-    return R600_FALLBACK_NONE;
+    for(i=0; i<VERT_ATTRIB_MAX; i++) {
+           if(vp->mesa_program->Base.InputsRead & (1 << i))
+           {
+                r700SetupVTXConstants(ctx,
+                                     (void*)(&context->radeon.tcl.aos[j]),
+                                     &(context->stream_desc[j]));
+               j++;
+           }
+    }
 }
 
-GLboolean r700SendContextStates(context_t *context)
+static void r700SetRenderTarget(context_t *context, int id)
 {
-    BATCH_LOCALS(&context->radeon);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 
-    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+    struct radeon_renderbuffer *rrb;
+    unsigned int nPitchInPixel;
 
-    ContextState * pState = r700->pStateList;
-    ContextState * pInit;
-    unsigned int   toSend;
-    unsigned int   ui;
+    rrb = radeon_get_colorbuffer(&context->radeon);
+    if (!rrb || !rrb->bo) {
+           return;
+    }
 
-    while(NULL != pState)
+    R600_STATECHANGE(context, cb_target);
+
+    /* color buffer */
+    r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset;
+
+    nPitchInPixel = rrb->pitch/rrb->cpp;
+    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+    r700->render_target[id].CB_COLOR0_BASE.u32All = 0;
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL,
+             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+    if(4 == rrb->cpp)
     {
-        toSend = 1;
-
-        pInit = pState;
-
-       while(NULL != pState->pNext)
-       {
-                if ((pState->pNext->unOffset - pState->unOffset) > 1)
-                {
-                       break;
-                }
-                else
-                {
-                       pState = pState->pNext;
-                       toSend++;
-                }
-       }
+        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8,
+                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask);
+    }
+    else
+    {
+        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5,
+                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV,
+                 COMP_SWAP_shift, COMP_SWAP_mask);
+    }
+    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
 
-        pState = pState->pNext;
+    r700->render_target[id].enabled = GL_TRUE;
+}
 
-        BEGIN_BATCH_NO_AUTOSTATE(toSend + 2);
-        R600_OUT_BATCH_REGSEQ(((pInit->unOffset + ASIC_CONTEXT_BASE_INDEX)<<2), toSend);
-        for(ui=0; ui<toSend; ui++)
-        {
-                R600_OUT_BATCH(*(pInit->puiValue));
-               pInit = pInit->pNext;
-        };
-        END_BATCH();
-    };
-
-    /* todo:
-     * - split this into a separate function?
-     * - only emit the ones we use
-     */
-    BEGIN_BATCH_NO_AUTOSTATE(2 + R700_MAX_SHADER_EXPORTS);
-    R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS);
-    for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
-           R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All);
-    END_BATCH();
+static void r700SetDepthTarget(context_t *context)
+{
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 
-    if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
-           for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
-                   if (r700->render_target[ui].enabled) {
-                           BEGIN_BATCH_NO_AUTOSTATE(3);
-                           R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui),
-                                                 r700->render_target[ui].CB_BLEND0_CONTROL.u32All);
-                           END_BATCH();
-                   }
-           }
-    }
+    struct radeon_renderbuffer *rrb;
+    unsigned int nPitchInPixel;
 
-    COMMIT_BATCH();
+    rrb = radeon_get_depthbuffer(&context->radeon);
+    if (!rrb)
+           return;
+
+    R600_STATECHANGE(context, db_target);
 
-    return GL_TRUE;
+    /* depth buf */
+    r700->DB_DEPTH_SIZE.u32All = 0;
+    r700->DB_DEPTH_BASE.u32All = 0;
+    r700->DB_DEPTH_INFO.u32All = 0;
+    r700->DB_DEPTH_VIEW.u32All = 0;
+
+    nPitchInPixel = rrb->pitch/rrb->cpp;
+
+    SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */
+
+    if(4 == rrb->cpp)
+    {
+        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24,
+                 DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+    }
+    else
+    {
+        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
+                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+    }
+    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1,
+             DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
+    /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
 }
 
-GLboolean r700SendDepthTargetState(context_t *context)
+static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_renderbuffer *rrb;
        BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        rrb = radeon_get_depthbuffer(&context->radeon);
        if (!rrb || !rrb->bo) {
-               fprintf(stderr, "no rrb\n");
-               return GL_FALSE;
+               return;
        }
 
+       r700SetDepthTarget(context);
+
         BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
        R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
        R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
@@ -539,29 +390,29 @@ GLboolean r700SendDepthTargetState(context_t *context)
 
        COMMIT_BATCH();
 
-       r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
-                    DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
-
-       return GL_TRUE;
 }
 
-GLboolean r700SendRenderTargetState(context_t *context, int id)
+static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_renderbuffer *rrb;
        BATCH_LOCALS(&context->radeon);
+       int id = 0;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        rrb = radeon_get_colorbuffer(&context->radeon);
        if (!rrb || !rrb->bo) {
-               fprintf(stderr, "no rrb\n");
-               return GL_FALSE;
+               return;
        }
 
+       r700SetRenderTarget(context, 0);
+
        if (id > R700_MAX_RENDER_TARGETS)
-               return GL_FALSE;
+               return;
 
        if (!r700->render_target[id].enabled)
-               return GL_FALSE;
+               return;
 
         BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
        R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
@@ -591,22 +442,20 @@ GLboolean r700SendRenderTargetState(context_t *context, int id)
 
        COMMIT_BATCH();
 
-       r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
-                    CB_ACTION_ENA_bit | (1 << (id + 6)));
-
-       return GL_TRUE;
 }
 
-GLboolean r700SendPSState(context_t *context)
+static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_bo * pbo;
        BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
 
        if (!pbo)
-               return GL_FALSE;
+               return;
 
        r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
@@ -627,19 +476,20 @@ GLboolean r700SendPSState(context_t *context)
 
        COMMIT_BATCH();
 
-       return GL_TRUE;
 }
 
-GLboolean r700SendVSState(context_t *context)
+static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_bo * pbo;
        BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
 
        if (!pbo)
-               return GL_FALSE;
+               return;
 
        r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
@@ -658,15 +508,15 @@ GLboolean r700SendVSState(context_t *context)
         END_BATCH();
 
        COMMIT_BATCH();
-
-       return GL_TRUE;
 }
 
-GLboolean r700SendFSState(context_t *context)
+static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_bo * pbo;
        BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        /* XXX fixme
         * R6xx chips require a FS be emitted, even if it's not used.
@@ -680,7 +530,7 @@ GLboolean r700SendFSState(context_t *context)
        /* XXX */
 
        if (!pbo)
-               return GL_FALSE;
+               return;
 
        r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
@@ -700,19 +550,21 @@ GLboolean r700SendFSState(context_t *context)
 
        COMMIT_BATCH();
 
-       return GL_TRUE;
 }
 
-GLboolean r700SendViewportState(context_t *context, int id)
+static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        BATCH_LOCALS(&context->radeon);
+       int id = 0;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        if (id > R700_MAX_VIEWPORTS)
-               return GL_FALSE;
+               return;
 
        if (!r700->viewport[id].enabled)
-               return GL_FALSE;
+               return;
 
         BEGIN_BATCH_NO_AUTOSTATE(16);
        R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL + (8 * id), 2);
@@ -732,15 +584,16 @@ GLboolean r700SendViewportState(context_t *context, int id)
 
        COMMIT_BATCH();
 
-       return GL_TRUE;
 }
 
-GLboolean r700SendSQConfig(context_t *context)
+static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-        BEGIN_BATCH_NO_AUTOSTATE(8);
+        BEGIN_BATCH_NO_AUTOSTATE(34);
        R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
        R600_OUT_BATCH(r700->sq_config.SQ_CONFIG.u32All);
        R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All);
@@ -748,17 +601,35 @@ GLboolean r700SendSQConfig(context_t *context)
        R600_OUT_BATCH(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All);
        R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All);
        R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All);
+
+       R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, r700->TA_CNTL_AUX.u32All);
+       R600_OUT_BATCH_REGVAL(VC_ENHANCE, r700->VC_ENHANCE.u32All);
+       R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All);
+       R600_OUT_BATCH_REGVAL(DB_DEBUG, r700->DB_DEBUG.u32All);
+       R600_OUT_BATCH_REGVAL(DB_WATERMARKS, r700->DB_WATERMARKS.u32All);
+
+       R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9);
+       R600_OUT_BATCH(r700->SQ_ESGS_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_GSVS_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_ESTMP_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_GSTMP_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_VSTMP_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_PSTMP_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_FBUF_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_REDUC_RING_ITEMSIZE.u32All);
+       R600_OUT_BATCH(r700->SQ_GS_VERT_ITEMSIZE.u32All);
         END_BATCH();
-       COMMIT_BATCH();
 
-       return GL_TRUE;
+       COMMIT_BATCH();
 }
 
-GLboolean r700SendUCPState(context_t *context)
+static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
+       context_t *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        BATCH_LOCALS(&context->radeon);
        int i;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        for (i = 0; i < R700_MAX_UCP; i++) {
                if (r700->ucp[i].enabled) {
@@ -772,7 +643,678 @@ GLboolean r700SendUCPState(context_t *context)
                        COMMIT_BATCH();
                }
        }
+}
+
+static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       unsigned int ui;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS);
+
+       R600_OUT_BATCH_REGSEQ(SQ_VTX_SEMANTIC_0, 32);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_0.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_1.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_2.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_3.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_4.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_5.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_6.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_7.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_8.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_9.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_10.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_11.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_12.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_13.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_14.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_15.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_16.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_17.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_18.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_19.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_20.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_21.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_22.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_23.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_24.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_25.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_26.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_27.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_28.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_29.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_30.u32All);
+       R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_31.u32All);
+
+       R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_ID_0, 10);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_0.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_1.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_2.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_3.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_4.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_5.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_6.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_7.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_8.u32All);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_ID_9.u32All);
+
+       R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_CONFIG, 9);
+       R600_OUT_BATCH(r700->SPI_VS_OUT_CONFIG.u32All);
+       R600_OUT_BATCH(r700->SPI_THREAD_GROUPING.u32All);
+       R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_0.u32All);
+       R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_1.u32All);
+       R600_OUT_BATCH(r700->SPI_INTERP_CONTROL_0.u32All);
+       R600_OUT_BATCH(r700->SPI_INPUT_Z.u32All);
+       R600_OUT_BATCH(r700->SPI_FOG_CNTL.u32All);
+       R600_OUT_BATCH(r700->SPI_FOG_FUNC_SCALE.u32All);
+       R600_OUT_BATCH(r700->SPI_FOG_FUNC_BIAS.u32All);
+
+       R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS);
+       for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+               R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All);
+
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+        BEGIN_BATCH_NO_AUTOSTATE(41);
+
+       R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
+       R600_OUT_BATCH(r700->VGT_MAX_VTX_INDX.u32All);
+       R600_OUT_BATCH(r700->VGT_MIN_VTX_INDX.u32All);
+       R600_OUT_BATCH(r700->VGT_INDX_OFFSET.u32All);
+       R600_OUT_BATCH(r700->VGT_MULTI_PRIM_IB_RESET_INDX.u32All);
+
+       R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13);
+       R600_OUT_BATCH(r700->VGT_OUTPUT_PATH_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_HOS_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_HOS_MAX_TESS_LEVEL.u32All);
+       R600_OUT_BATCH(r700->VGT_HOS_MIN_TESS_LEVEL.u32All);
+       R600_OUT_BATCH(r700->VGT_HOS_REUSE_DEPTH.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_PRIM_TYPE.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_FIRST_DECR.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_DECR.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_FMT_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_FMT_CNTL.u32All);
+       R600_OUT_BATCH(r700->VGT_GS_MODE.u32All);
+
+       R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, r700->VGT_PRIMITIVEID_EN.u32All);
+       R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, r700->VGT_MULTI_PRIM_IB_RESET_EN.u32All);
+       R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, r700->VGT_INSTANCE_STEP_RATE_0.u32All);
+       R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, r700->VGT_INSTANCE_STEP_RATE_1.u32All);
+
+       R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3);
+       R600_OUT_BATCH(r700->VGT_STRMOUT_EN.u32All);
+       R600_OUT_BATCH(r700->VGT_REUSE_OFF.u32All);
+       R600_OUT_BATCH(r700->VGT_VTX_CNT_EN.u32All);
+
+       R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, r700->VGT_STRMOUT_BUFFER_EN.u32All);
+
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+        BEGIN_BATCH_NO_AUTOSTATE(9);
+       R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All);
+       R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, r700->SX_ALPHA_TEST_CONTROL.u32All);
+       R600_OUT_BATCH_REGVAL(SX_ALPHA_REF, r700->SX_ALPHA_REF.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(17);
+
+       R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
+       R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
+       R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All);
+
+       R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All);
+       R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All);
+
+       R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2);
+       R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
+       R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
+
+       R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
+
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendStencilState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+
+        BEGIN_BATCH_NO_AUTOSTATE(4);
+       R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2);
+       R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All);
+       R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+               BEGIN_BATCH_NO_AUTOSTATE(11);
+               R600_OUT_BATCH_REGSEQ(CB_CLEAR_RED, 4);
+               R600_OUT_BATCH(r700->CB_CLEAR_RED_R6XX.u32All);
+               R600_OUT_BATCH(r700->CB_CLEAR_GREEN_R6XX.u32All);
+               R600_OUT_BATCH(r700->CB_CLEAR_BLUE_R6XX.u32All);
+               R600_OUT_BATCH(r700->CB_CLEAR_ALPHA_R6XX.u32All);
+               R600_OUT_BATCH_REGSEQ(CB_FOG_RED, 3);
+               R600_OUT_BATCH(r700->CB_FOG_RED_R6XX.u32All);
+               R600_OUT_BATCH(r700->CB_FOG_GREEN_R6XX.u32All);
+               R600_OUT_BATCH(r700->CB_FOG_BLUE_R6XX.u32All);
+               END_BATCH();
+       }
+
+       BEGIN_BATCH_NO_AUTOSTATE(7);
+       R600_OUT_BATCH_REGSEQ(CB_TARGET_MASK, 2);
+       R600_OUT_BATCH(r700->CB_TARGET_MASK.u32All);
+       R600_OUT_BATCH(r700->CB_SHADER_MASK.u32All);
+       R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, r700->CB_SHADER_CONTROL.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendCBCLRCMPState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
 
-       return GL_TRUE;
+       BEGIN_BATCH_NO_AUTOSTATE(6);
+       R600_OUT_BATCH_REGSEQ(CB_CLRCMP_CONTROL, 4);
+       R600_OUT_BATCH(r700->CB_CLRCMP_CONTROL.u32All);
+       R600_OUT_BATCH(r700->CB_CLRCMP_SRC.u32All);
+       R600_OUT_BATCH(r700->CB_CLRCMP_DST.u32All);
+       R600_OUT_BATCH(r700->CB_CLRCMP_MSK.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
 }
 
+static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       unsigned int ui;
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+               BEGIN_BATCH_NO_AUTOSTATE(3);
+               R600_OUT_BATCH_REGVAL(CB_BLEND_CONTROL, r700->CB_BLEND_CONTROL.u32All);
+               END_BATCH();
+       }
+
+       BEGIN_BATCH_NO_AUTOSTATE(3);
+       R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, r700->CB_COLOR_CONTROL.u32All);
+       END_BATCH();
+
+       if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+               for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+                       if (r700->render_target[ui].enabled) {
+                               BEGIN_BATCH_NO_AUTOSTATE(3);
+                               R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui),
+                                                     r700->render_target[ui].CB_BLEND0_CONTROL.u32All);
+                               END_BATCH();
+                       }
+               }
+       }
+
+       COMMIT_BATCH();
+}
+
+static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(6);
+       R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4);
+       R600_OUT_BATCH(r700->CB_BLEND_RED.u32All);
+       R600_OUT_BATCH(r700->CB_BLEND_GREEN.u32All);
+       R600_OUT_BATCH(r700->CB_BLEND_BLUE.u32All);
+       R600_OUT_BATCH(r700->CB_BLEND_ALPHA.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendSUState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+
+       BEGIN_BATCH_NO_AUTOSTATE(9);
+       R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, r700->PA_SU_SC_MODE_CNTL.u32All);
+       R600_OUT_BATCH_REGSEQ(PA_SU_POINT_SIZE, 4);
+       R600_OUT_BATCH(r700->PA_SU_POINT_SIZE.u32All);
+       R600_OUT_BATCH(r700->PA_SU_POINT_MINMAX.u32All);
+       R600_OUT_BATCH(r700->PA_SU_LINE_CNTL.u32All);
+       R600_OUT_BATCH(r700->PA_SU_VTX_CNTL.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+
+}
+
+static void r700SendPolyState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+
+       BEGIN_BATCH_NO_AUTOSTATE(10);
+       R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_CLAMP.u32All);
+       R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_SCALE.u32All);
+       R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+
+}
+
+static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(12);
+       R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All);
+       R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, r700->PA_CL_VTE_CNTL.u32All);
+       R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, r700->PA_CL_VS_OUT_CNTL.u32All);
+       R600_OUT_BATCH_REGVAL(PA_CL_NANINF_CNTL, r700->PA_CL_NANINF_CNTL.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendGBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+
+       BEGIN_BATCH_NO_AUTOSTATE(6);
+       R600_OUT_BATCH_REGSEQ(PA_CL_GB_VERT_CLIP_ADJ, 4);
+       R600_OUT_BATCH(r700->PA_CL_GB_VERT_CLIP_ADJ.u32All);
+       R600_OUT_BATCH(r700->PA_CL_GB_VERT_DISC_ADJ.u32All);
+       R600_OUT_BATCH(r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All);
+       R600_OUT_BATCH(r700->PA_CL_GB_HORZ_DISC_ADJ.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(22);
+       R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+       R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All);
+
+       R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12);
+       R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All);
+       R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All);
+
+       R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+       R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All);
+       R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       BEGIN_BATCH_NO_AUTOSTATE(15);
+       R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_LINE_STIPPLE, r700->PA_SC_LINE_STIPPLE.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_MPASS_PS_CNTL, r700->PA_SC_MPASS_PS_CNTL.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_MODE_CNTL, r700->PA_SC_MODE_CNTL.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_LINE_CNTL, r700->PA_SC_LINE_CNTL.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendAAState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       BATCH_LOCALS(&context->radeon);
+
+       BEGIN_BATCH_NO_AUTOSTATE(12);
+       R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, r700->PA_SC_AA_CONFIG.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_MCTX.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX.u32All);
+       R600_OUT_BATCH_REGVAL(PA_SC_AA_MASK, r700->PA_SC_AA_MASK.u32All);
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendPSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       int i;
+       BATCH_LOCALS(&context->radeon);
+
+       if (r700->ps.num_consts == 0)
+               return;
+
+       BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->ps.num_consts * 4));
+       R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->ps.num_consts * 4)));
+       /* assembler map const from very beginning. */
+       R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
+       for (i = 0; i < r700->ps.num_consts; i++) {
+               R600_OUT_BATCH(r700->ps.consts[i][0].u32All);
+               R600_OUT_BATCH(r700->ps.consts[i][1].u32All);
+               R600_OUT_BATCH(r700->ps.consts[i][2].u32All);
+               R600_OUT_BATCH(r700->ps.consts[i][3].u32All);
+       }
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+       int i;
+       BATCH_LOCALS(&context->radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       if (r700->vs.num_consts == 0)
+               return;
+
+       BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->vs.num_consts * 4));
+       R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->vs.num_consts * 4)));
+       /* assembler map const from very beginning. */
+       R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4);
+       for (i = 0; i < r700->vs.num_consts; i++) {
+               R600_OUT_BATCH(r700->vs.consts[i][0].u32All);
+               R600_OUT_BATCH(r700->vs.consts[i][1].u32All);
+               R600_OUT_BATCH(r700->vs.consts[i][2].u32All);
+               R600_OUT_BATCH(r700->vs.consts[i][3].u32All);
+       }
+       END_BATCH();
+       COMMIT_BATCH();
+}
+
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+       struct radeon_query_object *query = radeon->query.current;
+       BATCH_LOCALS(radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       /* clear the buffer */
+       radeon_bo_map(query->bo, GL_FALSE);
+       memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+       radeon_bo_unmap(query->bo);
+
+       radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+                                     query->bo,
+                                     0, RADEON_GEM_DOMAIN_GTT);
+
+       BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+       R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+       R600_OUT_BATCH(ZPASS_DONE);
+       R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+       R600_OUT_BATCH(0x00000000);
+       R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+       END_BATCH();
+       query->emitted_begin = GL_TRUE;
+}
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       return atom->cmd_size;
+}
+
+static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       int count = 7;
+
+       if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+               count += 11;
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+       return count;
+}
+
+static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       unsigned int ui;
+       int count = 3;
+
+       if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+               count += 3;
+
+       if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+               for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+                        if (r700->render_target[ui].enabled)
+                               count += 3;
+               }
+       }
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+       return count;
+}
+
+static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       int i;
+       int count = 0;
+
+       for (i = 0; i < R700_MAX_UCP; i++) {
+               if (r700->ucp[i].enabled)
+                       count += 6;
+       }
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+       return count;
+}
+
+static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       int count = context->radeon.tcl.aos_count * 18;
+
+       if (count)
+               count += 6;
+
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+       return count;
+}
+
+static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       unsigned int i, count = 0;
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+       for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                       radeonTexObj *t = r700->textures[i];
+                       if (t)
+                               count++;
+               }
+       }
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+       return count * 31;
+}
+
+static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       int count = r700->ps.num_consts * 4;
+
+       if (count)
+               count += 2;
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+       return count;
+}
+
+static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       context_t *context = R700_CONTEXT(ctx);
+       R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+       int count = r700->vs.num_consts * 4;
+
+       if (count)
+               count += 2;
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+       return count;
+}
+
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+       struct radeon_query_object *query = radeon->query.current;
+       int count;
+
+       if (!query || query->emitted_begin)
+               count = 0;
+       else
+               count = atom->cmd_size;
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+       return count;
+}
+
+#define ALLOC_STATE( ATOM, CHK, SZ, EMIT )                             \
+do {                                                                   \
+       context->atoms.ATOM.cmd_size = (SZ);                            \
+       context->atoms.ATOM.cmd = NULL;                                 \
+       context->atoms.ATOM.name = #ATOM;                               \
+       context->atoms.ATOM.idx = 0;                                    \
+       context->atoms.ATOM.check = check_##CHK;                        \
+       context->atoms.ATOM.dirty = GL_FALSE;                           \
+       context->atoms.ATOM.emit = (EMIT);                              \
+       context->radeon.hw.max_state_size += (SZ);                      \
+       insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
+} while (0)
+
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+       radeon->query.queryobj.cmd_size = (SZ);
+       radeon->query.queryobj.cmd = NULL;
+       radeon->query.queryobj.name = "queryobj";
+       radeon->query.queryobj.idx = 0;
+       radeon->query.queryobj.check = check_queryobj;
+       radeon->query.queryobj.dirty = GL_FALSE;
+       radeon->query.queryobj.emit = r700SendQueryBegin;
+       radeon->hw.max_state_size += (SZ);
+       insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
+void r600InitAtoms(context_t *context)
+{
+       radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
+       context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */
+
+       /* Setup the atom linked list */
+       make_empty_list(&context->radeon.hw.atomlist);
+       context->radeon.hw.atomlist.name = "atom-list";
+
+       ALLOC_STATE(sq, always, 34, r700SendSQConfig);
+       ALLOC_STATE(db, always, 17, r700SendDBState);
+       ALLOC_STATE(stencil, always, 4, r700SendStencilState);
+       ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+       ALLOC_STATE(sc, always, 15, r700SendSCState);
+       ALLOC_STATE(scissor, always, 22, r700SendScissorState);
+       ALLOC_STATE(aa, always, 12, r700SendAAState);
+       ALLOC_STATE(cl, always, 12, r700SendCLState);
+       ALLOC_STATE(gb, always, 6, r700SendGBState);
+       ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState);
+       ALLOC_STATE(su, always, 9, r700SendSUState);
+       ALLOC_STATE(poly, always, 10, r700SendPolyState);
+       ALLOC_STATE(cb, cb, 18, r700SendCBState);
+       ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+       ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
+       ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
+       ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
+       ALLOC_STATE(sx, always, 9, r700SendSXState);
+       ALLOC_STATE(vgt, always, 41, r700SendVGTState);
+       ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
+       ALLOC_STATE(vpt, always, 16, r700SendViewportState);
+       ALLOC_STATE(fs, always, 18, r700SendFSState);
+       ALLOC_STATE(vs, always, 18, r700SendVSState);
+       ALLOC_STATE(ps, always, 21, r700SendPSState);
+       ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
+       ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
+       ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+       ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
+       ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
+       ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+       r600_init_query_stateobj(&context->radeon, 6 * 2);
+
+       context->radeon.hw.is_dirty = GL_TRUE;
+       context->radeon.hw.all_dirty = GL_TRUE;
+}