Use AC_PATH_TOOL instead of AC_PATH_PROG for llvm-config.
[mesa.git] / src / gallium / drivers / radeonsi / r600_hw_context.c
index f050b3b965e4bafbc312d9da5023fc3a3e7fd1c7..c21a1013f93fcef57dbd9c1160efe0c51bfbe8e3 100644 (file)
@@ -23,7 +23,7 @@
  * Authors:
  *      Jerome Glisse
  */
-#include "r600_hw_context_priv.h"
+#include "../radeon/r600_cs.h"
 #include "radeonsi_pm4.h"
 #include "radeonsi_pipe.h"
 #include "sid.h"
 /* Get backends mask */
 void si_get_backend_mask(struct r600_context *ctx)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct si_resource *buffer;
+       struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+       struct r600_resource *buffer;
        uint32_t *results;
-       unsigned num_backends = ctx->screen->info.r600_num_backends;
+       unsigned num_backends = ctx->screen->b.info.r600_num_backends;
        unsigned i, mask = 0;
 
        /* if backend_map query is supported by the kernel */
-       if (ctx->screen->info.r600_backend_map_valid) {
-               unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
-               unsigned backend_map = ctx->screen->info.r600_backend_map;
+       if (ctx->screen->b.info.r600_backend_map_valid) {
+               unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes;
+               unsigned backend_map = ctx->screen->b.info.r600_backend_map;
                unsigned item_width = 4, item_mask = 0x7;
 
                while(num_tile_pipes--) {
@@ -61,43 +61,43 @@ void si_get_backend_mask(struct r600_context *ctx)
        /* otherwise backup path for older kernels */
 
        /* create buffer for event data */
-       buffer = si_resource_create_custom(&ctx->screen->screen,
+       buffer = r600_resource_create_custom(&ctx->screen->b.b,
                                           PIPE_USAGE_STAGING,
                                           ctx->max_db*16);
        if (!buffer)
                goto err;
 
        /* initialize buffer with zeroes */
-       results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+       results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
        if (results) {
                uint64_t va = 0;
 
                memset(results, 0, ctx->max_db * 4 * 4);
-               ctx->ws->buffer_unmap(buffer->cs_buf);
+               ctx->b.ws->buffer_unmap(buffer->cs_buf);
 
                /* emit EVENT_WRITE for ZPASS_DONE */
-               va = r600_resource_va(&ctx->screen->screen, (void *)buffer);
+               va = r600_resource_va(&ctx->screen->b.b, (void *)buffer);
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
                cs->buf[cs->cdw++] = va;
                cs->buf[cs->cdw++] = va >> 32;
 
                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-               cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
+               cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE);
 
                /* analyze results */
-               results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ);
+               results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ);
                if (results) {
                        for(i = 0; i < ctx->max_db; i++) {
                                /* at least highest bit will be set if backend is used */
                                if (results[i*4 + 1])
                                        mask |= (1<<i);
                        }
-                       ctx->ws->buffer_unmap(buffer->cs_buf);
+                       ctx->b.ws->buffer_unmap(buffer->cs_buf);
                }
        }
 
-       si_resource_reference(&buffer, NULL);
+       r600_resource_reference(&buffer, NULL);
 
        if (mask != 0) {
                ctx->backend_mask = mask;
@@ -129,7 +129,7 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        int i;
 
        /* The number of dwords we already used in the CS so far. */
-       num_dw += ctx->cs->cdw;
+       num_dw += ctx->b.rings.gfx.cs->cdw;
 
        for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
                if (ctx->atoms.array[i]->dirty) {
@@ -149,7 +149,9 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
 
        /* Count in streamout_end at the end of CS. */
-       num_dw += ctx->num_cs_dw_streamout_end;
+       if (ctx->b.streamout.begin_emitted) {
+               num_dw += ctx->b.streamout.num_dw_for_end;
+       }
 
        /* Count in render_condition(NULL) at the end of CS. */
        if (ctx->predicate_drawing) {
@@ -157,10 +159,7 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        }
 
        /* Count in framebuffer cache flushes at the end of CS. */
-       num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
-
-       /* Save 16 dwords for the fence mechanism. */
-       num_dw += 16;
+       num_dw += ctx->atoms.cache_flush->num_dw;
 
 #if R600_TRACE_CS
        if (ctx->screen->trace_bo) {
@@ -170,69 +169,39 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 
        /* Flush if there's not enough space. */
        if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
-               radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
+               radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC);
        }
 }
 
-static void r600_flush_framebuffer(struct r600_context *ctx)
-{
-       struct si_pm4_state *pm4;
-
-       if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
-               return;
-
-       pm4 = si_pm4_alloc_state(ctx);
-
-       if (pm4 == NULL)
-               return;
-
-       si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) |
-                               S_0085F0_CB1_DEST_BASE_ENA(1) |
-                               S_0085F0_CB2_DEST_BASE_ENA(1) |
-                               S_0085F0_CB3_DEST_BASE_ENA(1) |
-                               S_0085F0_CB4_DEST_BASE_ENA(1) |
-                               S_0085F0_CB5_DEST_BASE_ENA(1) |
-                               S_0085F0_CB6_DEST_BASE_ENA(1) |
-                               S_0085F0_CB7_DEST_BASE_ENA(1) |
-                               S_0085F0_DB_ACTION_ENA(1) |
-                               S_0085F0_DB_DEST_BASE_ENA(1));
-       si_cmd_flush_and_inv_cb_meta(pm4);
-
-       si_pm4_emit(ctx, pm4);
-       si_pm4_free_state(ctx, pm4, ~0);
-
-       ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
-       ctx->flush_and_inv_cb_meta = false;
-}
-
 void si_context_flush(struct r600_context *ctx, unsigned flags)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
-       bool queries_suspended = false;
-
-#if 0
-       bool streamout_suspended = false;
-#endif
+       struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 
        if (!cs->cdw)
                return;
 
        /* suspend queries */
+       ctx->nontimer_queries_suspended = false;
        if (ctx->num_cs_dw_nontimer_queries_suspend) {
                r600_context_queries_suspend(ctx);
-               queries_suspended = true;
+               ctx->nontimer_queries_suspended = true;
        }
 
-#if 0
-       if (ctx->num_cs_dw_streamout_end) {
-               r600_context_streamout_end(ctx);
-               streamout_suspended = true;
+       ctx->b.streamout.suspended = false;
+
+       if (ctx->b.streamout.begin_emitted) {
+               r600_emit_streamout_end(&ctx->b);
+               ctx->b.streamout.suspended = true;
        }
-#endif
 
-       r600_flush_framebuffer(ctx);
+       ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+                       R600_CONTEXT_FLUSH_AND_INV_CB_META |
+                       R600_CONTEXT_FLUSH_AND_INV_DB |
+                       R600_CONTEXT_FLUSH_AND_INV_DB_META |
+                       R600_CONTEXT_INV_TEX_CACHE;
+       si_emit_cache_flush(&ctx->b, NULL);
 
-       /* partial flush is needed to avoid lockups on some chips with user fences */
+       /* this is probably not needed anymore */
        cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
        cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 
@@ -252,7 +221,7 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
 #endif
 
        /* Flush the CS. */
-       ctx->ws->cs_flush(ctx->cs, flags, 0);
+       ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0);
 
 #if R600_TRACE_CS
        if (ctx->screen->trace_bo) {
@@ -274,8 +243,17 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
        }
 #endif
 
+       si_begin_new_cs(ctx);
+}
+
+void si_begin_new_cs(struct r600_context *ctx)
+{
        ctx->pm4_dirty_cdwords = 0;
-       ctx->flags = 0;
+
+       /* Flush read caches at the beginning of CS. */
+       ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+                       R600_CONTEXT_INV_CONST_CACHE |
+                       R600_CONTEXT_INV_SHADER_CACHE;
 
        /* set all valid group as dirty so they get reemited on
         * next draw command
@@ -286,44 +264,19 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
        si_pm4_emit(ctx, ctx->queued.named.init);
        ctx->emitted.named.init = ctx->queued.named.init;
 
-#if 0
-       if (streamout_suspended) {
-               ctx->streamout_start = TRUE;
-               ctx->streamout_append_bitmask = ~0;
+       if (ctx->b.streamout.suspended) {
+               ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask;
+               r600_streamout_buffers_dirty(&ctx->b);
        }
-#endif
 
        /* resume queries */
-       if (queries_suspended) {
+       if (ctx->nontimer_queries_suspended) {
                r600_context_queries_resume(ctx);
        }
 
        si_all_descriptors_begin_new_cs(ctx);
 }
 
-void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       uint64_t va;
-
-       si_need_cs_space(ctx, 10, FALSE);
-
-       va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
-       va = va + (offset << 2);
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-       cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
-       /* DATA_SEL | INT_EN | ADDRESS_HI */
-       cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
-       cs->buf[cs->cdw++] = value;                   /* DATA_LO */
-       cs->buf[cs->cdw++] = 0;                       /* DATA_HI */
-       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
-}
-
 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
                                       bool test_status_bit)
 {
@@ -347,7 +300,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
        unsigned results_base = query->results_start;
        char *map;
 
-       map = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs,
+       map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs,
                                  PIPE_TRANSFER_READ |
                                  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
        if (!map)
@@ -425,13 +378,13 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
        }
 
        query->results_start = query->results_end;
-       ctx->ws->buffer_unmap(query->buffer->cs_buf);
+       ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
        return TRUE;
 }
 
 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
+       struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
        unsigned new_results_end, i;
        uint32_t *results;
        uint64_t va;
@@ -448,7 +401,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
-               results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+               results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
                if (results) {
                        results = (uint32_t*)((char*)results + query->results_end);
                        memset(results, 0, query->result_size);
@@ -460,7 +413,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
                                        results[(i * 4)+3] = 0x80000000;
                                }
                        }
-                       ctx->ws->buffer_unmap(query->buffer->cs_buf);
+                       ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
                }
                break;
        case PIPE_QUERY_TIME_ELAPSED:
@@ -469,17 +422,17 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_PRIMITIVES_GENERATED:
        case PIPE_QUERY_SO_STATISTICS:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-               results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
+               results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
                results = (uint32_t*)((char*)results + query->results_end);
                memset(results, 0, query->result_size);
-               ctx->ws->buffer_unmap(query->buffer->cs_buf);
+               ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
                break;
        default:
                assert(0);
        }
 
        /* emit begin query */
-       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+       va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
        va += query->results_end;
 
        switch (query->type) {
@@ -496,8 +449,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
-               cs->buf[cs->cdw++] = query->results_end;
-               cs->buf[cs->cdw++] = 0;
+               cs->buf[cs->cdw++] = va;
+               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
@@ -511,7 +464,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
                assert(0);
        }
        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+       cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
 
        if (!si_is_timer_query(query->type)) {
                ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
@@ -520,7 +473,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
+       struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
        uint64_t va;
        unsigned new_results_end;
 
@@ -536,7 +489,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
                }
        }
 
-       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+       va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
        /* emit end query */
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -551,10 +504,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_PRIMITIVES_GENERATED:
        case PIPE_QUERY_SO_STATISTICS:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+               va += query->results_end + query->result_size/2;
                cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
-               cs->buf[cs->cdw++] = query->results_end + query->result_size/2;
-               cs->buf[cs->cdw++] = 0;
+               cs->buf[cs->cdw++] = va;
+               cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                va += query->results_end + query->result_size/2;
@@ -571,7 +525,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
                assert(0);
        }
        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
+       cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
 
        query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
 
@@ -583,7 +537,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
                            int flag_wait)
 {
-       struct radeon_winsys_cs *cs = ctx->cs;
+       struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
        uint64_t va;
 
        if (operation == PREDICATION_OP_CLEAR) {
@@ -605,7 +559,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 
                op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
                                (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
-               va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+               va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
 
                /* emit predicate packets for all data blocks */
                while (results_base != query->results_end) {
@@ -613,8 +567,8 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
                        cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
                        cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
                        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer,
-                                                                            RADEON_USAGE_READ);
+                       cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+                                                                  query->buffer, RADEON_USAGE_READ);
                        results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
 
                        /* set CONTINUE bit for all packets except the first */
@@ -669,7 +623,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
         * being written by the gpu, hence staging is probably a good
         * usage pattern.
         */
-       query->buffer = si_resource_create_custom(&ctx->screen->screen,
+       query->buffer = r600_resource_create_custom(&ctx->screen->b.b,
                                                  PIPE_USAGE_STAGING,
                                                  buffer_size);
        if (!query->buffer) {
@@ -681,7 +635,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
 
 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
 {
-       si_resource_reference(&query->buffer, NULL);
+       r600_resource_reference(&query->buffer, NULL);
        free(query);
 }
 
@@ -709,7 +663,7 @@ boolean r600_context_query_result(struct r600_context *ctx,
                break;
        case PIPE_QUERY_TIMESTAMP:
        case PIPE_QUERY_TIME_ELAPSED:
-               *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
+               *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq;
                break;
        case PIPE_QUERY_SO_STATISTICS:
                *result_so = query->result.so;
@@ -741,33 +695,6 @@ void r600_context_queries_resume(struct r600_context *ctx)
        }
 }
 
-void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       si_need_cs_space(ctx, 14 + 21, TRUE);
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-       cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2;
-       cs->buf[cs->cdw++] = 0;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-       cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - SI_CONTEXT_REG_OFFSET) >> 2;
-       cs->buf[cs->cdw++] = t->stride >> 2;
-
-#if 0
-       cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
-       cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
-       cs->buf[cs->cdw++] = 0; /* src address lo */
-       cs->buf[cs->cdw++] = 0; /* src address hi */
-       cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
-       cs->buf[cs->cdw++] = 0; /* unused */
-#endif
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
-
-}
-
 #if R600_TRACE_CS
 void r600_trace_emit(struct r600_context *rctx)
 {