radeonsi: separate and disable streamout for now
authorChristian König <deathsimple@vodafone.de>
Thu, 2 Aug 2012 13:21:02 +0000 (15:21 +0200)
committerChristian König <deathsimple@vodafone.de>
Sat, 11 Aug 2012 07:58:26 +0000 (09:58 +0200)
I have my doubts that this code still works on SI.

Signed-off-by: Christian König <deathsimple@vodafone.de>
src/gallium/drivers/radeonsi/Makefile.sources
src/gallium/drivers/radeonsi/evergreen_hw_context.c
src/gallium/drivers/radeonsi/r600.h
src/gallium/drivers/radeonsi/r600_hw_context.c
src/gallium/drivers/radeonsi/r600_hw_context_priv.h
src/gallium/drivers/radeonsi/radeonsi_pipe.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_streamout.c [new file with mode: 0644]

index 8e27b6c70500e3090d5831598f1d0f6c78d382cc..630afb8db5314b8b92842a741d0187c3f3a75e58 100644 (file)
@@ -12,4 +12,5 @@ C_SOURCES := \
        r600_state_common.c \
        radeonsi_pm4.c \
        si_state.c \
+       si_state_streamout.c \
        si_state_draw.c
index d07161758b5c995eeec43ed8c85019dcf7c56777..56b068fe0631fe5212939ec68af258315fbf01cd 100644 (file)
@@ -97,42 +97,3 @@ void si_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
        }
        cs->cdw += ndwords;
 }
-
-void evergreen_flush_vgt_streamout(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
-       cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
-       cs->buf[cs->cdw++] = 0;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
-       cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
-       cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
-       cs->buf[cs->cdw++] = 0;
-       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
-       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
-       cs->buf[cs->cdw++] = 4; /* poll interval */
-}
-
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       if (buffer_enable_bit) {
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
-
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
-       } else {
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
-       }
-}
index 610b9dad1cc34c66b06394b1e21787e3f35c6708..f34d1ff5737c7cf51298cb0afd529c41eb017d93 100644 (file)
@@ -126,8 +126,6 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 void r600_context_emit_fence(struct r600_context *ctx, struct si_resource *fence,
                              unsigned offset, unsigned value);
 
-void r600_context_streamout_begin(struct r600_context *ctx);
-void r600_context_streamout_end(struct r600_context *ctx);
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
 
index 509a8bf7fe1acdb3857ffec0dc5b632066372553..6765ef82e8a502baf277bf6e30e966dfd5ef8914 100644 (file)
@@ -182,7 +182,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        struct radeon_winsys_cs *cs = ctx->cs;
        struct r600_block *enable_block = NULL;
        bool queries_suspended = false;
+
+#if 0
        bool streamout_suspended = false;
+#endif
 
        if (!cs->cdw)
                return;
@@ -193,10 +196,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
                queries_suspended = true;
        }
 
+#if 0
        if (ctx->num_cs_dw_streamout_end) {
                r600_context_streamout_end(ctx);
                streamout_suspended = true;
        }
+#endif
 
        r600_flush_framebuffer(ctx, true);
 
@@ -213,10 +218,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        ctx->pm4_dirty_cdwords = 0;
        ctx->flags = 0;
 
+#if 0
        if (streamout_suspended) {
                ctx->streamout_start = TRUE;
                ctx->streamout_append_bitmask = ~0;
        }
+#endif
 
        /* resume queries */
        if (queries_suspended) {
@@ -638,131 +645,6 @@ void r600_context_queries_resume(struct r600_context *ctx)
        }
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned *strides = ctx->vs_shader_so_strides;
-       unsigned buffer_en, i;
-
-       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-       ctx->num_cs_dw_streamout_end =
-               12 + /* flush_vgt_streamout */
-               util_bitcount(buffer_en) * 8 +
-               3;
-
-       r600_need_cs_space(ctx,
-                          12 + /* flush_vgt_streamout */
-                          6 + /* enables */
-                          util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
-                          util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
-                          ctx->num_cs_dw_streamout_end, TRUE);
-
-       if (ctx->chip_class >= CAYMAN) {
-               evergreen_flush_vgt_streamout(ctx);
-               evergreen_set_streamout_enable(ctx, buffer_en);
-       }
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       t[i]->stride = strides[i];
-                       t[i]->so_index = i;
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
-                       cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
-                                                       16*i - SI_CONTEXT_REG_OFFSET) >> 2;
-                       cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
-                                                       t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
-                       cs->buf[cs->cdw++] = strides[i] >> 2;              /* VTX_STRIDE (in DW) */
-                       cs->buf[cs->cdw++] = 0;                    /* BUFFER_BASE */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
-                                                     RADEON_USAGE_WRITE);
-
-                       if (ctx->streamout_append_bitmask & (1 << i)) {
-                               /* Append. */
-                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* src address lo */
-                               cs->buf[cs->cdw++] = 0; /* src address hi */
-
-                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                               cs->buf[cs->cdw++] =
-                                       r600_context_bo_reloc(ctx,  t[i]->filled_size,
-                                                             RADEON_USAGE_READ);
-                       } else {
-                               /* Start from the beginning. */
-                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                       }
-               }
-#endif
-       }
-}
-
-void r600_context_streamout_end(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned i, flush_flags = 0;
-
-       evergreen_flush_vgt_streamout(ctx);
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                       cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                      STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
-                                                      STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
-                       cs->buf[cs->cdw++] = 0; /* dst address lo */
-                       cs->buf[cs->cdw++] = 0; /* dst address hi */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx,  t[i]->filled_size,
-                                                     RADEON_USAGE_WRITE);
-
-                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
-               }
-#endif
-       }
-
-       evergreen_set_streamout_enable(ctx, 0);
-
-       ctx->atom_surface_sync.flush_flags |= flush_flags;
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-
-       ctx->num_cs_dw_streamout_end = 0;
-
-       /* XXX print some debug info */
-       for (i = 0; i < ctx->num_so_targets; i++) {
-               if (!t[i])
-                       continue;
-
-               uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
-               printf("FILLED_SIZE%i: %u\n", i, *ptr);
-               ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
-       }
-}
-
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
index 6d458d48bdc2d306235177f64efe1fef253028d1..c2a15ebb966b61895b6c83c3a81c3f64d79924a1 100644 (file)
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-/*
- * evergreen_hw_context.c
- */
-void evergreen_flush_vgt_streamout(struct r600_context *ctx);
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit);
-
-
 static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct si_resource *rbo,
                                             enum radeon_bo_usage usage)
 {
index 9ee96a062004c80c2f0eaa818e44f72c86a66762..8356fda7c4962ce814ba8da747f0960efaea37e0 100644 (file)
@@ -350,6 +350,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return 0;
 
        /* Stream output. */
+#if 0
        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
                return debug_get_bool_option("R600_STREAMOUT", FALSE) ? 4 : 0;
        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
@@ -357,6 +358,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
                return 16*4;
+#endif
+       case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+       case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+               return 0;
 
        /* Texturing. */
        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
index e8a7b77da7e1e9d5b5f79b3ff32eafaff590d998..1d6d2149cfd4045858097319ed1a13cbc1ddcc85 100644 (file)
@@ -2204,74 +2204,6 @@ static void si_set_index_buffer(struct pipe_context *ctx,
        }
 }
 
-/*
- * Stream out
- */
-
-static struct pipe_stream_output_target *
-si_create_so_target(struct pipe_context *ctx,
-                   struct pipe_resource *buffer,
-                   unsigned buffer_offset,
-                   unsigned buffer_size)
-{
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_so_target *t;
-       void *ptr;
-
-       t = CALLOC_STRUCT(r600_so_target);
-       if (!t) {
-               return NULL;
-       }
-
-       t->b.reference.count = 1;
-       t->b.context = ctx;
-       pipe_resource_reference(&t->b.buffer, buffer);
-       t->b.buffer_offset = buffer_offset;
-       t->b.buffer_size = buffer_size;
-
-       t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
-       ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-       memset(ptr, 0, t->filled_size->buf->size);
-       rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
-       return &t->b;
-}
-
-static void si_so_target_destroy(struct pipe_context *ctx,
-                                struct pipe_stream_output_target *target)
-{
-       struct r600_so_target *t = (struct r600_so_target*)target;
-       pipe_resource_reference(&t->b.buffer, NULL);
-       si_resource_reference(&t->filled_size, NULL);
-       FREE(t);
-}
-
-static void si_set_so_targets(struct pipe_context *ctx,
-                             unsigned num_targets,
-                             struct pipe_stream_output_target **targets,
-                             unsigned append_bitmask)
-{
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       unsigned i;
-
-       /* Stop streamout. */
-       if (rctx->num_so_targets) {
-               r600_context_streamout_end(rctx);
-       }
-
-       /* Set the new targets. */
-       for (i = 0; i < num_targets; i++) {
-               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
-       }
-       for (; i < rctx->num_so_targets; i++) {
-               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
-       }
-
-       rctx->num_so_targets = num_targets;
-       rctx->streamout_start = num_targets != 0;
-       rctx->streamout_append_bitmask = append_bitmask;
-}
-
 /*
  * Misc
  */
index 6729fd4df558404745de3225308bf0b6966afafc..a69722c975ba60cfea46530f21871d61c93c5e7a 100644 (file)
@@ -132,6 +132,19 @@ bool si_is_format_supported(struct pipe_screen *screen,
 void si_init_state_functions(struct r600_context *rctx);
 void si_init_config(struct r600_context *rctx);
 
+/* si_state_streamout.c */
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+                   struct pipe_resource *buffer,
+                   unsigned buffer_offset,
+                   unsigned buffer_size);
+void si_so_target_destroy(struct pipe_context *ctx,
+                         struct pipe_stream_output_target *target);
+void si_set_so_targets(struct pipe_context *ctx,
+                      unsigned num_targets,
+                      struct pipe_stream_output_target **targets,
+                      unsigned append_bitmask);
+
 /* si_state_draw.c */
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 
index e974642f05b2057868e61a295ff02e91f6a420ca..40ca95751b90c8210b2765710676a4199b96d729 100644 (file)
@@ -535,11 +535,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
        si_pm4_emit_dirty(rctx);
        rctx->pm4_dirty_cdwords = 0;
 
+#if 0
        /* Enable stream out if needed. */
        if (rctx->streamout_start) {
                r600_context_streamout_begin(rctx);
                rctx->streamout_start = FALSE;
        }
+#endif
 
        si_context_draw(rctx, &rdraw);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
new file mode 100644 (file)
index 0000000..3410eb6
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ */
+
+#include "radeonsi_pipe.h"
+#include "si_state.h"
+
+/*
+ * Stream out
+ */
+
+#if 0
+void si_context_streamout_begin(struct r600_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+       struct si_so_target **t = ctx->so_targets;
+       unsigned *strides = ctx->vs_shader_so_strides;
+       unsigned buffer_en, i;
+
+       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
+                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
+                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
+                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
+
+       ctx->num_cs_dw_streamout_end =
+               12 + /* flush_vgt_streamout */
+               util_bitcount(buffer_en) * 8 +
+               3;
+
+       si_need_cs_space(ctx,
+                          12 + /* flush_vgt_streamout */
+                          6 + /* enables */
+                          util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
+                          util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
+                          ctx->num_cs_dw_streamout_end, TRUE);
+
+       if (ctx->chip_class >= CAYMAN) {
+               evergreen_flush_vgt_streamout(ctx);
+               evergreen_set_streamout_enable(ctx, buffer_en);
+       }
+
+       for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+               if (t[i]) {
+                       t[i]->stride = strides[i];
+                       t[i]->so_index = i;
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
+                       cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
+                                                       16*i - SI_CONTEXT_REG_OFFSET) >> 2;
+                       cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
+                                                       t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
+                       cs->buf[cs->cdw++] = strides[i] >> 2;              /* VTX_STRIDE (in DW) */
+                       cs->buf[cs->cdw++] = 0;                    /* BUFFER_BASE */
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                       cs->buf[cs->cdw++] =
+                               si_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
+                                                     RADEON_USAGE_WRITE);
+
+                       if (ctx->streamout_append_bitmask & (1 << i)) {
+                               /* Append. */
+                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* src address lo */
+                               cs->buf[cs->cdw++] = 0; /* src address hi */
+
+                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                               cs->buf[cs->cdw++] =
+                                       si_context_bo_reloc(ctx,  t[i]->filled_size,
+                                                             RADEON_USAGE_READ);
+                       } else {
+                               /* Start from the beginning. */
+                               cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                              STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                       }
+               }
+#endif
+       }
+}
+
+void si_context_streamout_end(struct r600_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+       struct si_so_target **t = ctx->so_targets;
+       unsigned i, flush_flags = 0;
+
+       evergreen_flush_vgt_streamout(ctx);
+
+       for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+               if (t[i]) {
+                       cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+                       cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                      STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+                                                      STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
+                       cs->buf[cs->cdw++] = 0; /* dst address lo */
+                       cs->buf[cs->cdw++] = 0; /* dst address hi */
+                       cs->buf[cs->cdw++] = 0; /* unused */
+                       cs->buf[cs->cdw++] = 0; /* unused */
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                       cs->buf[cs->cdw++] =
+                               si_context_bo_reloc(ctx,  t[i]->filled_size,
+                                                     RADEON_USAGE_WRITE);
+
+                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
+               }
+#endif
+       }
+
+       evergreen_set_streamout_enable(ctx, 0);
+
+       ctx->atom_surface_sync.flush_flags |= flush_flags;
+       si_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+
+       ctx->num_cs_dw_streamout_end = 0;
+
+       /* XXX print some debug info */
+       for (i = 0; i < ctx->num_so_targets; i++) {
+               if (!t[i])
+                       continue;
+
+               uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
+               printf("FILLED_SIZE%i: %u\n", i, *ptr);
+               ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
+       }
+}
+
+void evergreen_flush_vgt_streamout(struct si_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+       cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
+       cs->buf[cs->cdw++] = 0;
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
+       cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
+       cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
+       cs->buf[cs->cdw++] = 0;
+       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
+       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
+       cs->buf[cs->cdw++] = 4; /* poll interval */
+}
+
+void evergreen_set_streamout_enable(struct si_context *ctx, unsigned buffer_enable_bit)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+
+       if (buffer_enable_bit) {
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
+
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
+       } else {
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
+       }
+}
+
+#endif
+
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+                   struct pipe_resource *buffer,
+                   unsigned buffer_offset,
+                   unsigned buffer_size)
+{
+#if 0
+       struct si_context *rctx = (struct r600_context *)ctx;
+       struct si_so_target *t;
+       void *ptr;
+
+       t = CALLOC_STRUCT(si_so_target);
+       if (!t) {
+               return NULL;
+       }
+
+       t->b.reference.count = 1;
+       t->b.context = ctx;
+       pipe_resource_reference(&t->b.buffer, buffer);
+       t->b.buffer_offset = buffer_offset;
+       t->b.buffer_size = buffer_size;
+
+       t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
+       ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
+       memset(ptr, 0, t->filled_size->buf->size);
+       rctx->ws->buffer_unmap(t->filled_size->cs_buf);
+
+       return &t->b;
+#endif
+       return NULL;
+}
+
+void si_so_target_destroy(struct pipe_context *ctx,
+                         struct pipe_stream_output_target *target)
+{
+#if 0
+       struct si_so_target *t = (struct r600_so_target*)target;
+       pipe_resource_reference(&t->b.buffer, NULL);
+       si_resource_reference(&t->filled_size, NULL);
+       FREE(t);
+#endif
+}
+
+void si_set_so_targets(struct pipe_context *ctx,
+                      unsigned num_targets,
+                      struct pipe_stream_output_target **targets,
+                      unsigned append_bitmask)
+{
+       assert(num_targets == 0);
+#if 0
+       struct si_context *rctx = (struct r600_context *)ctx;
+       unsigned i;
+
+       /* Stop streamout. */
+       if (rctx->num_so_targets) {
+               si_context_streamout_end(rctx);
+       }
+
+       /* Set the new targets. */
+       for (i = 0; i < num_targets; i++) {
+               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+       }
+       for (; i < rctx->num_so_targets; i++) {
+               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+       }
+
+       rctx->num_so_targets = num_targets;
+       rctx->streamout_start = num_targets != 0;
+       rctx->streamout_append_bitmask = append_bitmask;
+#endif
+}