radeonsi: Handle position input parameter for pixel shaders v2
[mesa.git] / src / gallium / drivers / radeonsi / r600_hw_context.c
index 2ba77b736a6b6e902360c347e711aed154e2c359..685f1951738a88f4442cb5f00286751197d5c6c0 100644 (file)
@@ -24,6 +24,7 @@
  *      Jerome Glisse
  */
 #include "r600_hw_context_priv.h"
+#include "radeonsi_pm4.h"
 #include "radeonsi_pipe.h"
 #include "sid.h"
 #include "util/u_memory.h"
 #define GROUP_FORCE_NEW_BLOCK  0
 
 /* Get backends mask */
-void r600_get_backend_mask(struct r600_context *ctx)
+void si_get_backend_mask(struct r600_context *ctx)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_resource *buffer;
+       struct si_resource *buffer;
        uint32_t *results;
        unsigned num_backends = ctx->screen->info.r600_num_backends;
        unsigned i, mask = 0;
@@ -65,9 +66,9 @@ void r600_get_backend_mask(struct r600_context *ctx)
        /* otherwise backup path for older kernels */
 
        /* create buffer for event data */
-       buffer = (struct r600_resource*)
-               pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
-                                  PIPE_USAGE_STAGING, ctx->max_db*16);
+       buffer = si_resource_create_custom(&ctx->screen->screen,
+                                          PIPE_USAGE_STAGING,
+                                          ctx->max_db*16);
        if (!buffer)
                goto err;
 
@@ -101,7 +102,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
                }
        }
 
-       pipe_resource_reference((struct pipe_resource**)&buffer, NULL);
+       si_resource_reference(&buffer, NULL);
 
        if (mask != 0) {
                ctx->backend_mask = mask;
@@ -114,210 +115,19 @@ err:
        return;
 }
 
-static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
-               return;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-
-       ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
-}
-
-void r600_init_cs(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       /* All asics require this one */
-       cs->buf[cs->cdw++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0);
-       cs->buf[cs->cdw++] = 0x80000000;
-       cs->buf[cs->cdw++] = 0x80000000;
-
-       ctx->init_dwords = cs->cdw;
-}
-
-static void r600_init_block(struct r600_context *ctx,
-                           struct r600_block *block,
-                           const struct r600_reg *reg, int index, int nreg,
-                           unsigned opcode, unsigned offset_base)
-{
-       int i = index;
-       int j, n = nreg;
-
-       /* initialize block */
-       block->flags = 0;
-       block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */
-       block->start_offset = reg[i].offset;
-       block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0);
-       block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2;
-       block->reg = &block->pm4[block->pm4_ndwords];
-       block->pm4_ndwords += n;
-       block->nreg = n;
-       block->nreg_dirty = n;
-       LIST_INITHEAD(&block->list);
-       LIST_INITHEAD(&block->enable_list);
-
-       for (j = 0; j < n; j++) {
-               if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) {
-                       block->flags |= REG_FLAG_DIRTY_ALWAYS;
-               }
-               if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) {
-                       if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
-                               block->status |= R600_BLOCK_STATUS_ENABLED;
-                               LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
-                               LIST_ADDTAIL(&block->list,&ctx->dirty);
-                       }
-               }
-               if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) {
-                       block->flags |= REG_FLAG_FLUSH_CHANGE;
-               }
-
-               if (reg[i+j].flags & REG_FLAG_NEED_BO) {
-                       block->nbo++;
-                       assert(block->nbo < R600_BLOCK_MAX_BO);
-                       block->pm4_bo_index[j] = block->nbo;
-                       block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0);
-                       block->pm4[block->pm4_ndwords++] = 0x00000000;
-                       block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1;
-               }
-       }
-       /* check that we stay in limit */
-       assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
-}
-
-int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg,
-                          unsigned opcode, unsigned offset_base)
-{
-       struct r600_block *block;
-       struct r600_range *range;
-       int offset;
-
-       for (unsigned i = 0, n = 0; i < nreg; i += n) {
-               /* ignore new block balise */
-               if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) {
-                       n = 1;
-                       continue;
-               }
-
-               /* register that need relocation are in their own group */
-               /* find number of consecutive registers */
-               n = 0;
-               offset = reg[i].offset;
-               while (reg[i + n].offset == offset) {
-                       n++;
-                       offset += 4;
-                       if ((n + i) >= nreg)
-                               break;
-                       if (n >= (R600_BLOCK_MAX_REG - 2))
-                               break;
-               }
-
-               /* allocate new block */
-               block = calloc(1, sizeof(struct r600_block));
-               if (block == NULL) {
-                       return -ENOMEM;
-               }
-               ctx->nblocks++;
-               for (int j = 0; j < n; j++) {
-                       range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)];
-                       /* create block table if it doesn't exist */
-                       if (!range->blocks)
-                               range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *));
-                       if (!range->blocks)
-                               return -1;
-
-                       range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block;
-               }
-
-               r600_init_block(ctx, block, reg, i, n, opcode, offset_base);
-
-       }
-       return 0;
-}
-
-
 /* initialize */
-void r600_context_fini(struct r600_context *ctx)
-{
-       struct r600_block *block;
-       struct r600_range *range;
-
-       for (int i = 0; i < NUM_RANGES; i++) {
-               if (!ctx->range[i].blocks)
-                       continue;
-               for (int j = 0; j < (1 << HASH_SHIFT); j++) {
-                       block = ctx->range[i].blocks[j];
-                       if (block) {
-                               for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) {
-                                       range = &ctx->range[CTX_RANGE_ID(offset)];
-                                       range->blocks[CTX_BLOCK_ID(offset)] = NULL;
-                               }
-                               for (int k = 1; k <= block->nbo; k++) {
-                                       pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL);
-                               }
-                               free(block);
-                       }
-               }
-               free(ctx->range[i].blocks);
-       }
-       free(ctx->range);
-       free(ctx->blocks);
-       ctx->ws->cs_destroy(ctx->cs);
-}
-
-int r600_setup_block_table(struct r600_context *ctx)
-{
-       /* setup block table */
-       int c = 0;
-       ctx->blocks = calloc(ctx->nblocks, sizeof(void*));
-       if (!ctx->blocks)
-               return -ENOMEM;
-       for (int i = 0; i < NUM_RANGES; i++) {
-               if (!ctx->range[i].blocks)
-                       continue;
-               for (int j = 0, add; j < (1 << HASH_SHIFT); j++) {
-                       if (!ctx->range[i].blocks[j])
-                               continue;
-
-                       add = 1;
-                       for (int k = 0; k < c; k++) {
-                               if (ctx->blocks[k] == ctx->range[i].blocks[j]) {
-                                       add = 0;
-                                       break;
-                               }
-                       }
-                       if (add) {
-                               assert(c < ctx->nblocks);
-                               ctx->blocks[c++] = ctx->range[i].blocks[j];
-                               j += (ctx->range[i].blocks[j]->nreg) - 1;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
+void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
                        boolean count_draw_in)
 {
-       struct r600_atom *state;
-
        /* The number of dwords we already used in the CS so far. */
        num_dw += ctx->cs->cdw;
 
        if (count_draw_in) {
                /* The number of dwords all the dirty states would take. */
-               LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) {
-                       num_dw += state->num_dw;
-               }
-
                num_dw += ctx->pm4_dirty_cdwords;
 
                /* The upper-bound of how much a draw command would take. */
-               num_dw += R600_MAX_DRAW_CS_DWORDS;
+               num_dw += SI_MAX_DRAW_CS_DWORDS;
        }
 
        /* Count in queries_suspend. */
@@ -343,175 +153,40 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        }
 }
 
-void r600_context_dirty_block(struct r600_context *ctx,
-                             struct r600_block *block,
-                             int dirty, int index)
+static void r600_flush_framebuffer(struct r600_context *ctx)
 {
-       if ((index + 1) > block->nreg_dirty)
-               block->nreg_dirty = index + 1;
-
-       if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) {
-               block->status |= R600_BLOCK_STATUS_DIRTY;
-               ctx->pm4_dirty_cdwords += block->pm4_ndwords;
-               if (!(block->status & R600_BLOCK_STATUS_ENABLED)) {
-                       block->status |= R600_BLOCK_STATUS_ENABLED;
-                       LIST_ADDTAIL(&block->enable_list, &ctx->enable_list);
-               }
-               LIST_ADDTAIL(&block->list,&ctx->dirty);
-
-               if (block->flags & REG_FLAG_FLUSH_CHANGE) {
-                       r600_context_ps_partial_flush(ctx);
-               }
-       }
-}
-
-void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
-{
-       struct r600_block *block;
-       int dirty;
-       for (int i = 0; i < state->nregs; i++) {
-               unsigned id, reloc_id;
-               struct r600_pipe_reg *reg = &state->regs[i];
-
-               block = reg->block;
-               id = reg->id;
-
-               dirty = block->status & R600_BLOCK_STATUS_DIRTY;
-
-               if (reg->value != block->reg[id]) {
-                       block->reg[id] = reg->value;
-                       dirty |= R600_BLOCK_STATUS_DIRTY;
-               }
-               if (block->flags & REG_FLAG_DIRTY_ALWAYS)
-                       dirty |= R600_BLOCK_STATUS_DIRTY;
-               if (block->pm4_bo_index[id]) {
-                       /* find relocation */
-                       reloc_id = block->pm4_bo_index[id];
-                       pipe_resource_reference((struct pipe_resource**)&block->reloc[reloc_id].bo, &reg->bo->b.b);
-                       block->reloc[reloc_id].bo_usage = reg->bo_usage;
-                       /* always force dirty for relocs for now */
-                       dirty |= R600_BLOCK_STATUS_DIRTY;
-               }
-
-               if (dirty)
-                       r600_context_dirty_block(ctx, block, dirty, id);
-       }
-}
+       struct si_pm4_state *pm4;
 
-struct r600_resource *r600_context_reg_bo(struct r600_context *ctx, unsigned offset)
-{
-       struct r600_range *range;
-       struct r600_block *block;
-       unsigned id;
-
-       range = &ctx->range[CTX_RANGE_ID(offset)];
-       block = range->blocks[CTX_BLOCK_ID(offset)];
-       offset -= block->start_offset;
-       id = block->pm4_bo_index[offset >> 2];
-       if (block->reloc[id].bo) {
-               return block->reloc[id].bo;
-       }
-       return NULL;
-}
-
-void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
-       int cp_dwords = block->pm4_ndwords, start_dword = 0;
-       int new_dwords = 0;
-       int nbo = block->nbo;
-
-       if (block->nreg_dirty == 0 && optional) {
-               goto out;
-       }
-
-       if (nbo) {
-               ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH;
-
-               for (int j = 0; j < block->nreg; j++) {
-                       if (block->pm4_bo_index[j]) {
-                               /* find relocation */
-                               struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
-                               block->pm4[reloc->bo_pm4_index] =
-                                       r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
-                               nbo--;
-                               if (nbo == 0)
-                                       break;
-                       }
-               }
-               ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
-       }
-
-       optional &= (block->nreg_dirty != block->nreg);
-       if (optional) {
-               new_dwords = block->nreg_dirty;
-               start_dword = cs->cdw;
-               cp_dwords = new_dwords + 2;
-       }
-       memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4);
-       cs->cdw += cp_dwords;
-
-       if (optional) {
-               uint32_t newword;
-
-               newword = cs->buf[start_dword];
-               newword &= PKT_COUNT_C;
-               newword |= PKT_COUNT_S(new_dwords);
-               cs->buf[start_dword] = newword;
-       }
-out:
-       block->status ^= R600_BLOCK_STATUS_DIRTY;
-       block->nreg_dirty = 0;
-       LIST_DELINIT(&block->list);
-}
-
-void r600_inval_shader_cache(struct r600_context *ctx)
-{
-       ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-       ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
-
-void r600_inval_texture_cache(struct r600_context *ctx)
-{
-       ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
-
-void r600_inval_vertex_cache(struct r600_context *ctx)
-{
-       /* Some GPUs don't have the vertex cache and must use the texture cache instead. */
-       ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-}
-
-void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
-{
        if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
                return;
 
-       ctx->atom_surface_sync.flush_flags |=
-               r600_get_cb_flush_flags(ctx) |
-               (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
-
-       if (flush_now) {
-               r600_emit_atom(ctx, &ctx->atom_surface_sync.atom);
-       } else {
-               r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-       }
+       pm4 = CALLOC_STRUCT(si_pm4_state);
+       si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) |
+                               S_0085F0_CB1_DEST_BASE_ENA(1) |
+                               S_0085F0_CB2_DEST_BASE_ENA(1) |
+                               S_0085F0_CB3_DEST_BASE_ENA(1) |
+                               S_0085F0_CB4_DEST_BASE_ENA(1) |
+                               S_0085F0_CB5_DEST_BASE_ENA(1) |
+                               S_0085F0_CB6_DEST_BASE_ENA(1) |
+                               S_0085F0_CB7_DEST_BASE_ENA(1) |
+                               S_0085F0_DB_ACTION_ENA(1) |
+                               S_0085F0_DB_DEST_BASE_ENA(1));
+       si_pm4_emit(ctx, pm4);
+       si_pm4_free_state(ctx, pm4, ~0);
 
        ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
 }
 
-void r600_context_flush(struct r600_context *ctx, unsigned flags)
+void si_context_flush(struct r600_context *ctx, unsigned flags)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_block *enable_block = NULL;
        bool queries_suspended = false;
+
+#if 0
        bool streamout_suspended = false;
+#endif
 
-       if (cs->cdw == ctx->init_dwords)
+       if (!cs->cdw)
                return;
 
        /* suspend queries */
@@ -520,29 +195,34 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
                queries_suspended = true;
        }
 
+#if 0
        if (ctx->num_cs_dw_streamout_end) {
                r600_context_streamout_end(ctx);
                streamout_suspended = true;
        }
+#endif
 
-       r600_flush_framebuffer(ctx, true);
+       r600_flush_framebuffer(ctx);
 
        /* partial flush is needed to avoid lockups on some chips with user fences */
        cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
        cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 
+       /* force to keep tiling flags */
+       flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
+
        /* Flush the CS. */
        ctx->ws->cs_flush(ctx->cs, flags);
 
        ctx->pm4_dirty_cdwords = 0;
        ctx->flags = 0;
 
-       r600_init_cs(ctx);
-
+#if 0
        if (streamout_suspended) {
                ctx->streamout_start = TRUE;
                ctx->streamout_append_bitmask = ~0;
        }
+#endif
 
        /* resume queries */
        if (queries_suspended) {
@@ -552,22 +232,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        /* set all valid group as dirty so they get reemited on
         * next draw command
         */
-       LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) {
-               if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) {
-                       LIST_ADDTAIL(&enable_block->list,&ctx->dirty);
-                       enable_block->status |= R600_BLOCK_STATUS_DIRTY;
-               }
-               ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords;
-               enable_block->nreg_dirty = enable_block->nreg;
-       }
+       si_pm4_reset_emitted(ctx);
 }
 
-void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
+void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
        uint64_t va;
 
-       r600_need_cs_space(ctx, 10, FALSE);
+       si_need_cs_space(ctx, 10, FALSE);
 
        va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
        va = va + (offset << 2);
@@ -691,7 +364,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        uint32_t *results;
        uint64_t va;
 
-       r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
+       si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
 
        new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
 
@@ -822,7 +495,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
        uint64_t va;
 
        if (operation == PREDICATION_OP_CLEAR) {
-               r600_need_cs_space(ctx, 3, FALSE);
+               si_need_cs_space(ctx, 3, FALSE);
 
                cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
                cs->buf[cs->cdw++] = 0;
@@ -836,7 +509,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
                count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0;
                count /= query->result_size;
 
-               r600_need_cs_space(ctx, 5 * count, TRUE);
+               si_need_cs_space(ctx, 5 * count, TRUE);
 
                op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
                                (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
@@ -900,8 +573,9 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
         * being written by the gpu, hence staging is probably a good
         * usage pattern.
         */
-       query->buffer = (struct r600_resource*)
-               pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buffer_size);
+       query->buffer = si_resource_create_custom(&ctx->screen->screen,
+                                                 PIPE_USAGE_STAGING,
+                                                 buffer_size);
        if (!query->buffer) {
                FREE(query);
                return NULL;
@@ -911,7 +585,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
 
 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
 {
-       pipe_resource_reference((struct pipe_resource**)&query->buffer, NULL);
+       si_resource_reference(&query->buffer, NULL);
        free(query);
 }
 
@@ -970,135 +644,10 @@ void r600_context_queries_resume(struct r600_context *ctx)
        }
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned *strides = ctx->vs_shader_so_strides;
-       unsigned buffer_en, i;
-
-       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-       ctx->num_cs_dw_streamout_end =
-               12 + /* flush_vgt_streamout */
-               util_bitcount(buffer_en) * 8 +
-               3;
-
-       r600_need_cs_space(ctx,
-                          12 + /* flush_vgt_streamout */
-                          6 + /* enables */
-                          util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
-                          util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
-                          ctx->num_cs_dw_streamout_end, TRUE);
-
-       if (ctx->chip_class >= CAYMAN) {
-               evergreen_flush_vgt_streamout(ctx);
-               evergreen_set_streamout_enable(ctx, buffer_en);
-       }
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       t[i]->stride = strides[i];
-                       t[i]->so_index = i;
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
-                       cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
-                                                       16*i - SI_CONTEXT_REG_OFFSET) >> 2;
-                       cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
-                                                       t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
-                       cs->buf[cs->cdw++] = strides[i] >> 2;              /* VTX_STRIDE (in DW) */
-                       cs->buf[cs->cdw++] = 0;                    /* BUFFER_BASE */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer),
-                                                     RADEON_USAGE_WRITE);
-
-                       if (ctx->streamout_append_bitmask & (1 << i)) {
-                               /* Append. */
-                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* src address lo */
-                               cs->buf[cs->cdw++] = 0; /* src address hi */
-
-                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                               cs->buf[cs->cdw++] =
-                                       r600_context_bo_reloc(ctx,  t[i]->filled_size,
-                                                             RADEON_USAGE_READ);
-                       } else {
-                               /* Start from the beginning. */
-                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                       }
-               }
-#endif
-       }
-}
-
-void r600_context_streamout_end(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned i, flush_flags = 0;
-
-       evergreen_flush_vgt_streamout(ctx);
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                       cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                      STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
-                                                      STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
-                       cs->buf[cs->cdw++] = 0; /* dst address lo */
-                       cs->buf[cs->cdw++] = 0; /* dst address hi */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx,  t[i]->filled_size,
-                                                     RADEON_USAGE_WRITE);
-
-                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
-               }
-#endif
-       }
-
-       evergreen_set_streamout_enable(ctx, 0);
-
-       ctx->atom_surface_sync.flush_flags |= flush_flags;
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-
-       ctx->num_cs_dw_streamout_end = 0;
-
-       /* XXX print some debug info */
-       for (i = 0; i < ctx->num_so_targets; i++) {
-               if (!t[i])
-                       continue;
-
-               uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
-               printf("FILLED_SIZE%i: %u\n", i, *ptr);
-               ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
-       }
-}
-
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
-       r600_need_cs_space(ctx, 14 + 21, TRUE);
+       si_need_cs_space(ctx, 14 + 21, TRUE);
 
        cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
        cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2;
@@ -1137,7 +686,7 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar
        cs->buf[cs->cdw++] = t->b.buffer_offset >> 2;
 
        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, (struct r600_resource*)t->b.buffer,
+       cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, (struct si_resource*)t->b.buffer,
                                                             RADEON_USAGE_WRITE);
 
        cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);