R6xx/R7xx: add fine grained syncing support
authorAlex Deucher <alexdeucher@gmail.com>
Fri, 17 Jul 2009 23:04:19 +0000 (19:04 -0400)
committerAlex Deucher <alexdeucher@gmail.com>
Fri, 17 Jul 2009 23:05:30 +0000 (19:05 -0400)
src/mesa/drivers/dri/r600/r600_emit.c
src/mesa/drivers/dri/r600/r700_chip.c
src/mesa/drivers/dri/r600/r700_render.c

index ed51e428e9acc3dc8bcdb3ff70b3458fa7adc4f8..685f7fe4736a967a1ff223837abab90c3bf0ae26 100644 (file)
@@ -50,17 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 void r600EmitCacheFlush(context_t *rmesa)
 {
        BATCH_LOCALS(&rmesa->radeon);
-/*
-       BEGIN_BATCH_NO_AUTOSTATE(4);
-       OUT_BATCH_REGVAL(R600_RB3D_DSTCACHE_CTLSTAT,
-               R600_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-               R600_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-       OUT_BATCH_REGVAL(R600_ZB_ZCACHE_CTLSTAT,
-               R600_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-               R600_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-       END_BATCH();
-       COMMIT_BATCH();
-*/
 }
 
 GLboolean r600EmitShader(GLcontext * ctx, 
index 383d8bfb399ec9cc0a46988593ed04d689ce99a8..087d17312e92c9ed7408404e40b25562d3080cdc 100644 (file)
@@ -294,6 +294,14 @@ void r700SetupVTXConstants(GLcontext  * ctx,
     unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
     unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
 
+    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+       (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+           r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+    else
+           r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
     uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
     uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
 
@@ -433,7 +441,6 @@ GLboolean r700SendDepthTargetState(context_t *context, int id)
 {
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_renderbuffer *rrb;
-       struct radeon_bo * pbo;
        offset_modifiers offset_mod;
        BATCH_LOCALS(&context->radeon);
 
@@ -482,6 +489,9 @@ GLboolean r700SendDepthTargetState(context_t *context, int id)
 
        COMMIT_BATCH();
 
+       r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+                    DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
        return GL_TRUE;
 }
 
@@ -489,7 +499,6 @@ GLboolean r700SendRenderTargetState(context_t *context, int id)
 {
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_renderbuffer *rrb;
-       struct radeon_bo * pbo;
        offset_modifiers offset_mod;
        BATCH_LOCALS(&context->radeon);
 
@@ -542,6 +551,9 @@ GLboolean r700SendRenderTargetState(context_t *context, int id)
 
        COMMIT_BATCH();
 
+       r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+                    CB_ACTION_ENA_bit | (1 << (id + 6)));
+
        return GL_TRUE;
 }
 
@@ -559,6 +571,8 @@ GLboolean r700SendPSState(context_t *context)
        offset_mod.shiftbits = 0;
        offset_mod.mask      = 0xFFFFFFFF;
 
+       r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
         BEGIN_BATCH_NO_AUTOSTATE(3);
        R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
        R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
@@ -592,6 +606,8 @@ GLboolean r700SendVSState(context_t *context)
        offset_mod.shiftbits = 0;
        offset_mod.mask      = 0xFFFFFFFF;
 
+       r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
         BEGIN_BATCH_NO_AUTOSTATE(3);
        R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
        R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
@@ -633,6 +649,8 @@ GLboolean r700SendFSState(context_t *context)
        offset_mod.shiftbits = 0;
        offset_mod.mask      = 0xFFFFFFFF;
 
+       r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
         BEGIN_BATCH_NO_AUTOSTATE(3);
        R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
        R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All,
@@ -655,7 +673,6 @@ GLboolean r700SendViewportState(context_t *context, int id)
 {
        R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
        struct radeon_renderbuffer *rrb;
-       struct radeon_bo * pbo;
        offset_modifiers offset_mod;
        BATCH_LOCALS(&context->radeon);
 
index 532a6e07b79f639fd6a1265209d63ae31f5e778f..f1e467a317f4f1e84e09c3d47405b1be6a0ea982 100644 (file)
@@ -58,9 +58,13 @@ void r700WaitForIdle(context_t *context);
 void r700WaitForIdleClean(context_t *context);
 void r700Start3D(context_t *context);
 GLboolean r700SendTextureState(context_t *context);
-GLboolean r700SyncSurf(context_t *context);
 unsigned int r700PrimitiveType(int prim);
 void r600UpdateTextureState(GLcontext * ctx);
+GLboolean r700SyncSurf(context_t *context,
+                      struct radeon_bo *pbo,
+                      uint32_t read_domain,
+                      uint32_t write_domain,
+                      uint32_t sync_type);
 
 void r700WaitForIdle(context_t *context)
 {
@@ -153,6 +157,11 @@ GLboolean r700SendTextureState(context_t *context)
                    else
                            bo = t->bo;
                    if (bo) {
+
+                           r700SyncSurf(context, bo,
+                                        RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+                                        0, TC_ACTION_ENA_bit);
+
                            BEGIN_BATCH_NO_AUTOSTATE(9);
                            R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
                            R600_OUT_BATCH(i * 7);
@@ -185,26 +194,33 @@ GLboolean r700SendTextureState(context_t *context)
     return GL_TRUE;
 }
 
-GLboolean r700SyncSurf(context_t *context)
+GLboolean r700SyncSurf(context_t *context,
+                      struct radeon_bo *pbo,
+                      uint32_t read_domain,
+                      uint32_t write_domain,
+                      uint32_t sync_type)
 {
     BATCH_LOCALS(&context->radeon);
+    uint32_t cp_coher_size;
+    offset_modifiers offset_mod;
 
-    /* TODO : too heavy? */
-    unsigned int CP_COHER_CNTL   = 0;
-
-    CP_COHER_CNTL |= (TC_ACTION_ENA_bit
-                     | VC_ACTION_ENA_bit
-                     | CB_ACTION_ENA_bit
-                     | DB_ACTION_ENA_bit
-                     | SH_ACTION_ENA_bit
-                     | SMX_ACTION_ENA_bit);
+    if (pbo->size == 0xffffffff)
+           cp_coher_size = 0xffffffff;
+    else
+           cp_coher_size = ((pbo->size + 255) >> 8);
 
+    offset_mod.shift     = NO_SHIFT;
+    offset_mod.shiftbits = 0;
+    offset_mod.mask      = 0xFFFFFFFF;
 
     BEGIN_BATCH_NO_AUTOSTATE(5);
     R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
-    R600_OUT_BATCH(CP_COHER_CNTL);
-    R600_OUT_BATCH(0xFFFFFFFF);
-    R600_OUT_BATCH(0x00000000);
+    R600_OUT_BATCH(sync_type);
+    R600_OUT_BATCH(cp_coher_size);
+    R600_OUT_BATCH_RELOC(0,
+                        pbo,
+                        0,
+                        read_domain, write_domain, 0, &offset_mod); // ???
     R600_OUT_BATCH(10);
 
     END_BATCH();
@@ -276,8 +292,6 @@ static GLboolean r700RunRender(GLcontext * ctx,
 
     r700Start3D(context); /* TODO : this is too much. */
 
-    r700SyncSurf(context); /* TODO : make it light. */
-
     r700SendSQConfig(context);
 
     r700UpdateShaders(ctx);
@@ -291,9 +305,6 @@ static GLboolean r700RunRender(GLcontext * ctx,
         return GL_TRUE;
     }
 
-    /* flush TX */
-    //r700SyncSurf(context); /*  */
-
     r600UpdateTextureState(ctx);
     r700SendTextureState(context);
 
@@ -305,19 +316,12 @@ static GLboolean r700RunRender(GLcontext * ctx,
         }
     }
 
-    /* flush SQ */
-    //r700SyncSurf(context); /*  */
-    //r700SyncSurf(context); /*  */
-
     r700SetupShaders(ctx);
 
     r700SendFSState(context); // FIXME just a place holder for now
     r700SendPSState(context);
     r700SendVSState(context);
 
-    /* flush vtx */
-    //r700SyncSurf(context); /*  */
-
     r700SendContextStates(context);
     r700SendViewportState(context, 0);
     r700SendRenderTargetState(context, 0);
@@ -375,20 +379,8 @@ static GLboolean r700RunRender(GLcontext * ctx,
     /* Flush render op cached for last several quads. */
     r700WaitForIdleClean(context);
 
-    /* flush dst */
-    //r700SyncSurf(context); /*  */
-
     radeonReleaseArrays(ctx, 0);
 
-    //richard test
-    /* test stamp, write a number to mmSCRATCH4 */
-#if 0
-    BEGIN_BATCH_NO_AUTOSTATE(3);
-    R600_OUT_BATCH_REGVAL((0x2144 << 2), 0x56785678);
-    END_BATCH();
-    COMMIT_BATCH();
-#endif
-
 #endif //0
     rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ );