Revert "r600c: substract vbo offset to vbo size"
[mesa.git] / src / mesa / drivers / dri / r600 / r700_chip.c
index 3b7f6fffe0388790a045584487d42755ed750270..a742dbcf129c51351ad36d27951aeebc8d20a9af 100644 (file)
 #include "r600_context.h"
 #include "r600_cmdbuf.h"
 
-#include "r700_state.h"
 #include "r600_tex.h"
 #include "r700_oglprog.h"
 #include "r700_fragprog.h"
 #include "r700_vertprog.h"
-#include "r700_ioctl.h"
 
 #include "radeon_mipmap_tree.h"
 
@@ -45,6 +43,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
        context_t         *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
        struct radeon_bo *bo = NULL;
        unsigned int i;
        BATCH_LOCALS(&context->radeon);
@@ -52,13 +53,14 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
-               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {            
                        radeonTexObj *t = r700->textures[i];
                        if (t) {
-                               if (!t->image_override)
+                               if (!t->image_override) {
                                        bo = t->mt->bo;
-                               else
+                               } else {
                                        bo = t->bo;
+                               }
                                if (bo) {
 
                                        r700SyncSurf(context, bo,
@@ -67,7 +69,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
 
                                        BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
                                        R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-                                       R600_OUT_BATCH(i * 7);
+
+                    if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                    {   /* vs texture */                                     
+                        R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+                    }
+                    else
+                    {
+                                           R600_OUT_BATCH(i * 7);
+                    }
+
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
@@ -77,7 +88,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
                                        R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
                                        R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
                                                             bo,
-                                                            0,
+                                                            r700->textures[i]->SQ_TEX_RESOURCE2,
                                                             RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
                                        R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
                                                             bo,
@@ -91,21 +102,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
        }
 }
 
+#define SAMPLER_STRIDE                 3
+
 static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
        context_t         *context = R700_CONTEXT(ctx);
        R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
        unsigned int i;
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
        BATCH_LOCALS(&context->radeon);
        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
        for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
-               if (ctx->Texture.Unit[i]._ReallyEnabled) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled) {            
                        radeonTexObj *t = r700->textures[i];
                        if (t) {
                                BEGIN_BATCH_NO_AUTOSTATE(5);
                                R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
-                               R600_OUT_BATCH(i * 3);
+
+                if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                {   /* vs texture */
+                    R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+                }
+                else
+                {
+                                   R600_OUT_BATCH(i * 3);
+                }
+
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
                                R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
@@ -141,77 +166,10 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom
        }
 }
 
+extern int getTypeSize(GLenum type);
 static void r700SetupVTXConstants(GLcontext  * ctx,
-                                 unsigned int nStreamID,
                                  void *       pAos,
-                                 unsigned int size,      /* number of elements in vector */
-                                 unsigned int stride,
-                                 unsigned int count)     /* number of vectors in stream */
-{
-    context_t *context = R700_CONTEXT(ctx);
-    struct radeon_aos * paos = (struct radeon_aos *)pAos;
-    BATCH_LOCALS(&context->radeon);
-    radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
-
-    unsigned int uSQ_VTX_CONSTANT_WORD0_0;
-    unsigned int uSQ_VTX_CONSTANT_WORD1_0;
-    unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
-    unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
-    unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
-
-    if (!paos->bo)
-           return;
-
-    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
-       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
-       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
-       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
-       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
-           r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
-    else
-           r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
-
-    uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
-    uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
-
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
-            SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
-            SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
-            SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
-    SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
-            SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
-    SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
-
-    SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
-    SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
-            SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
-
-    BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
-
-    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-    R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
-    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
-    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
-    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
-    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
-    R600_OUT_BATCH(0);
-    R600_OUT_BATCH(0);
-    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0);
-    R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
-                         paos->bo,
-                         uSQ_VTX_CONSTANT_WORD0_0,
-                         RADEON_GEM_DOMAIN_GTT, 0, 0);
-    END_BATCH();
-    COMMIT_BATCH();
-
-}
-
-extern int getTypeSize(GLenum type);
-static void r700SetupVTXConstants2(GLcontext  * ctx,
-                                                  void *       pAos,
-                                   StreamDesc * pStreamDesc)     
+                                 StreamDesc * pStreamDesc)
 {
     context_t *context = R700_CONTEXT(ctx);
     struct radeon_aos * paos = (struct radeon_aos *)pAos;
@@ -242,7 +200,8 @@ static void r700SetupVTXConstants2(GLcontext  * ctx,
     }
     else
     {
-        nVBsize = paos->count * pStreamDesc->stride;
+        nVBsize = (paos->count - 1) * pStreamDesc->stride
+                  + pStreamDesc->size * getTypeSize(pStreamDesc->type);
     }
 
     uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
@@ -260,11 +219,11 @@ static void r700SetupVTXConstants2(GLcontext  * ctx,
         SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
                     SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
     }
-    //else
-    //{
-    //    SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT,
-       //             SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
-    //}
+    else
+    {
+        SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
+                    SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+    }
 
     if(1 == pStreamDesc->_signed)
     {
@@ -295,31 +254,6 @@ static void r700SetupVTXConstants2(GLcontext  * ctx,
 
 }
 
-void r700SetupStreams(GLcontext *ctx)
-{
-    context_t         *context = R700_CONTEXT(ctx);
-    struct r700_vertex_program *vp = context->selected_vp;
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *vb = &tnl->vb;
-    unsigned int i, j = 0;
-       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
-
-    R600_STATECHANGE(context, vtx);
-
-    for(i=0; i<VERT_ATTRIB_MAX; i++) {
-           if(vp->mesa_program->Base.InputsRead & (1 << i)) {
-                   rcommon_emit_vector(ctx,
-                                       &context->radeon.tcl.aos[j],
-                                       vb->AttribPtr[i]->data,
-                                       vb->AttribPtr[i]->size,
-                                       vb->AttribPtr[i]->stride,
-                                       vb->Count);
-                   j++;
-           }
-    }
-    context->radeon.tcl.aos_count = j;
-}
-
 static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
     context_t         *context = R700_CONTEXT(ctx);
@@ -343,25 +277,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
     COMMIT_BATCH();
 
     for(i=0; i<VERT_ATTRIB_MAX; i++) {
-           if(vp->mesa_program->Base.InputsRead & (1 << i)) 
-        {
-                   if(1 == context->selected_vp->uiVersion)
-            {
-                       /* currently aos are packed */
-                       r700SetupVTXConstants(ctx,
-                                             i,
-                                             (void*)(&context->radeon.tcl.aos[j]),
-                                             (unsigned int)context->radeon.tcl.aos[j].components,
-                                             (unsigned int)context->radeon.tcl.aos[j].stride * 4,
-                                             (unsigned int)context->radeon.tcl.aos[j].count);
-            }
-            else
-            {   /* context->selected_vp->uiVersion == 2 : aos not always packed */                
-                r700SetupVTXConstants2(ctx,                                        
-                                             (void*)(&context->radeon.tcl.aos[j]),
-                                             &(context->stream_desc[j]));
-            }
-                   j++;
+           if(vp->mesa_program->Base.InputsRead & (1 << i))
+           {
+                r700SetupVTXConstants(ctx,
+                                     (void*)(&context->radeon.tcl.aos[j]),
+                                     &(context->stream_desc[j]));
+               j++;
            }
     }
 }
@@ -381,14 +302,13 @@ static void r700SetRenderTarget(context_t *context, int id)
     R600_STATECHANGE(context, cb_target);
 
     /* color buffer */
-    r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset;
+    r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset / 256;
 
     nPitchInPixel = rrb->pitch/rrb->cpp;
     SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1,
              PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
     SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
              SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
-    r700->render_target[id].CB_COLOR0_BASE.u32All = 0;
     SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
     SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL,
              CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
@@ -463,7 +383,6 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
 
        rrb = radeon_get_depthbuffer(&context->radeon);
        if (!rrb || !rrb->bo) {
-               fprintf(stderr, "no rrb\n");
                return;
        }
 
@@ -505,7 +424,6 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
 
        rrb = radeon_get_colorbuffer(&context->radeon);
        if (!rrb || !rrb->bo) {
-               fprintf(stderr, "no rrb\n");
                return;
        }
 
@@ -533,13 +451,31 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
                R600_OUT_BATCH((2 << id));
                END_BATCH();
        }
+       /* Set CMASK & TILE buffer to the offset of color buffer as
+        * we don't use those this shouldn't cause any issue and we
+        * then have a valid cmd stream
+        */
+       BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+       R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
+       R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
+       R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+                            rrb->bo,
+                            r700->render_target[id].CB_COLOR0_BASE.u32All,
+                            0, RADEON_GEM_DOMAIN_VRAM, 0);
+       END_BATCH();
+       BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+       R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
+       R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
+       R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+                            rrb->bo,
+                            r700->render_target[id].CB_COLOR0_BASE.u32All,
+                            0, RADEON_GEM_DOMAIN_VRAM, 0);
+        END_BATCH();
 
-        BEGIN_BATCH_NO_AUTOSTATE(18);
+        BEGIN_BATCH_NO_AUTOSTATE(12);
        R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
        R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
        R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
-       R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All);
-       R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All);
        R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
         END_BATCH();
 
@@ -549,68 +485,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
 
 static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
-       context_t *context = R700_CONTEXT(ctx);
-       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
-       struct radeon_bo * pbo;
-       BATCH_LOCALS(&context->radeon);
-       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+    struct radeon_bo * pbo;
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-       pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
+    pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
 
-       if (!pbo)
-               return;
+    if (!pbo)
+           return;
 
-       r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
-        BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
-       R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
-       R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
-       R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
-                            pbo,
-                            r700->ps.SQ_PGM_START_PS.u32All,
-                            RADEON_GEM_DOMAIN_GTT, 0, 0);
-       END_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+    R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
+    R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+                        pbo,
+                        r700->ps.SQ_PGM_START_PS.u32All,
+                        RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
 
-        BEGIN_BATCH_NO_AUTOSTATE(9);
-       R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
-       R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
-       R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
-        END_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(9);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+    END_BATCH();
 
-       COMMIT_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF);
+    END_BATCH();
+
+    COMMIT_BATCH();
 
 }
 
 static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
 {
-       context_t *context = R700_CONTEXT(ctx);
-       R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
-       struct radeon_bo * pbo;
-       BATCH_LOCALS(&context->radeon);
-       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+    struct radeon_bo * pbo;
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-       pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+    pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
 
-       if (!pbo)
-               return;
+    if (!pbo)
+           return;
 
-       r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
-        BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
-       R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
-       R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
-       R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
-                            pbo,
-                            r700->vs.SQ_PGM_START_VS.u32All,
-                            RADEON_GEM_DOMAIN_GTT, 0, 0);
-       END_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+    R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
+    R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+                        pbo,
+                        r700->vs.SQ_PGM_START_VS.u32All,
+                        RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
 
-        BEGIN_BATCH_NO_AUTOSTATE(6);
-       R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
-       R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
-        END_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+    END_BATCH();
 
-       COMMIT_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F);
+    //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
+    END_BATCH();
+
+    COMMIT_BATCH();
 }
 
 static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
@@ -891,8 +836,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
        BATCH_LOCALS(&context->radeon);
        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-        BEGIN_BATCH_NO_AUTOSTATE(23);
-       R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All);
+       BEGIN_BATCH_NO_AUTOSTATE(17);
 
        R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
        R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
@@ -905,7 +849,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
        R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
        R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
 
-       R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All);
        R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
 
        END_BATCH();
@@ -1205,6 +1148,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
        COMMIT_BATCH();
 }
 
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+       struct radeon_query_object *query = radeon->query.current;
+       BATCH_LOCALS(radeon);
+       radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+       /* clear the buffer */
+       radeon_bo_map(query->bo, GL_FALSE);
+       memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+       radeon_bo_unmap(query->bo);
+
+       radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+                                     query->bo,
+                                     0, RADEON_GEM_DOMAIN_GTT);
+
+       BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+       R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+       R600_OUT_BATCH(ZPASS_DONE);
+       R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+       R600_OUT_BATCH(0x00000000);
+       R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+       END_BATCH();
+       query->emitted_begin = GL_TRUE;
+}
+
 static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
 {
        return atom->cmd_size;
@@ -1233,7 +1202,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
                count += 3;
 
        if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
-               for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+               /* targets are enabled in r700SetRenderTarget but state
+                  size is calculated before that. Until MRT's are done
+                  hardcode target0 as enabled. */
+               count += 3;
+               for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
                         if (r700->render_target[ui].enabled)
                                count += 3;
                }
@@ -1313,6 +1286,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
        return count;
 }
 
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+       radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+       struct radeon_query_object *query = radeon->query.current;
+       int count;
+
+       if (!query || query->emitted_begin)
+               count = 0;
+       else
+               count = atom->cmd_size;
+       radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+       return count;
+}
+
 #define ALLOC_STATE( ATOM, CHK, SZ, EMIT )                             \
 do {                                                                   \
        context->atoms.ATOM.cmd_size = (SZ);                            \
@@ -1326,6 +1313,19 @@ do {                                                                     \
        insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
 } while (0)
 
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+       radeon->query.queryobj.cmd_size = (SZ);
+       radeon->query.queryobj.cmd = NULL;
+       radeon->query.queryobj.name = "queryobj";
+       radeon->query.queryobj.idx = 0;
+       radeon->query.queryobj.check = check_queryobj;
+       radeon->query.queryobj.dirty = GL_FALSE;
+       radeon->query.queryobj.emit = r700SendQueryBegin;
+       radeon->hw.max_state_size += (SZ);
+       insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
 void r600InitAtoms(context_t *context)
 {
        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
@@ -1336,7 +1336,7 @@ void r600InitAtoms(context_t *context)
        context->radeon.hw.atomlist.name = "atom-list";
 
        ALLOC_STATE(sq, always, 34, r700SendSQConfig);
-       ALLOC_STATE(db, always, 23, r700SendDBState);
+       ALLOC_STATE(db, always, 17, r700SendDBState);
        ALLOC_STATE(stencil, always, 4, r700SendStencilState);
        ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
        ALLOC_STATE(sc, always, 15, r700SendSCState);
@@ -1349,22 +1349,23 @@ void r600InitAtoms(context_t *context)
        ALLOC_STATE(poly, always, 10, r700SendPolyState);
        ALLOC_STATE(cb, cb, 18, r700SendCBState);
        ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+       ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
        ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
        ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
-       ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
        ALLOC_STATE(sx, always, 9, r700SendSXState);
        ALLOC_STATE(vgt, always, 41, r700SendVGTState);
        ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
        ALLOC_STATE(vpt, always, 16, r700SendViewportState);
        ALLOC_STATE(fs, always, 18, r700SendFSState);
-       ALLOC_STATE(vs, always, 18, r700SendVSState);
-       ALLOC_STATE(ps, always, 21, r700SendPSState);
+       ALLOC_STATE(vs, always, 21, r700SendVSState);
+       ALLOC_STATE(ps, always, 24, r700SendPSState);
        ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
        ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
        ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
        ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
        ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
        ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+       r600_init_query_stateobj(&context->radeon, 6 * 2);
 
        context->radeon.hw.is_dirty = GL_TRUE;
        context->radeon.hw.all_dirty = GL_TRUE;