From: Christian König Date: Thu, 2 Aug 2012 13:21:02 +0000 (+0200) Subject: radeonsi: separate and disable streamout for now X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=303f4b7dcddee384d6f1dc1027cbdee840a38d7d;p=mesa.git radeonsi: separate and disable streamout for now I have my doubts that this code still works on SI. Signed-off-by: Christian König --- diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 8e27b6c7050..630afb8db53 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -12,4 +12,5 @@ C_SOURCES := \ r600_state_common.c \ radeonsi_pm4.c \ si_state.c \ + si_state_streamout.c \ si_state_draw.c diff --git a/src/gallium/drivers/radeonsi/evergreen_hw_context.c b/src/gallium/drivers/radeonsi/evergreen_hw_context.c index d07161758b5..56b068fe063 100644 --- a/src/gallium/drivers/radeonsi/evergreen_hw_context.c +++ b/src/gallium/drivers/radeonsi/evergreen_hw_context.c @@ -97,42 +97,3 @@ void si_context_draw(struct r600_context *ctx, const struct r600_draw *draw) } cs->cdw += ndwords; } - -void evergreen_flush_vgt_streamout(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); - cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = 0; - - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); - - cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); - cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ - cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2; /* register */ - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */ - cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */ - cs->buf[cs->cdw++] = 4; /* poll interval */ -} - -void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - if (buffer_enable_bit) { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1); - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit); - } else { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0); - } -} diff --git a/src/gallium/drivers/radeonsi/r600.h b/src/gallium/drivers/radeonsi/r600.h index 610b9dad1cc..f34d1ff5737 100644 --- a/src/gallium/drivers/radeonsi/r600.h +++ b/src/gallium/drivers/radeonsi/r600.h @@ -126,8 +126,6 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, void r600_context_emit_fence(struct r600_context *ctx, struct si_resource *fence, unsigned offset, unsigned value); -void r600_context_streamout_begin(struct r600_context *ctx); -void r600_context_streamout_end(struct r600_context *ctx); void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t); void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c index 509a8bf7fe1..6765ef82e8a 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c @@ -182,7 +182,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) struct radeon_winsys_cs *cs = ctx->cs; struct r600_block *enable_block = NULL; bool queries_suspended = false; + +#if 0 bool streamout_suspended = false; +#endif if (!cs->cdw) return; @@ -193,10 +196,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) queries_suspended = true; } +#if 0 if (ctx->num_cs_dw_streamout_end) { r600_context_streamout_end(ctx); streamout_suspended = true; } +#endif r600_flush_framebuffer(ctx, true); @@ -213,10 +218,12 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; +#if 0 if (streamout_suspended) { ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; } +#endif /* resume queries */ if (queries_suspended) { @@ -638,131 +645,6 @@ void r600_context_queries_resume(struct r600_context *ctx) } } -void r600_context_streamout_begin(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - struct r600_so_target **t = ctx->so_targets; - unsigned *strides = ctx->vs_shader_so_strides; - unsigned buffer_en, i; - - buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | - (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | - (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | - (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); - - ctx->num_cs_dw_streamout_end = - 12 + /* flush_vgt_streamout */ - util_bitcount(buffer_en) * 8 + - 3; - - r600_need_cs_space(ctx, - 12 + /* flush_vgt_streamout */ - 6 + /* enables */ - util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + - util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + - ctx->num_cs_dw_streamout_end, TRUE); - - if (ctx->chip_class >= CAYMAN) { - evergreen_flush_vgt_streamout(ctx); - evergreen_set_streamout_enable(ctx, buffer_en); - } - - for (i = 0; i < ctx->num_so_targets; i++) { -#if 0 - if (t[i]) { - t[i]->stride = strides[i]; - t[i]->so_index = i; - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0); - cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + - 16*i - SI_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = (t[i]->b.buffer_offset + - t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ - cs->buf[cs->cdw++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */ - cs->buf[cs->cdw++] = 0; /* BUFFER_BASE */ - - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, si_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE); - - if (ctx->streamout_append_bitmask & (1 << i)) { - /* Append. */ - cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); - cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */ - cs->buf[cs->cdw++] = 0; /* unused */ - cs->buf[cs->cdw++] = 0; /* unused */ - cs->buf[cs->cdw++] = 0; /* src address lo */ - cs->buf[cs->cdw++] = 0; /* src address hi */ - - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->filled_size, - RADEON_USAGE_READ); - } else { - /* Start from the beginning. */ - cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); - cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */ - cs->buf[cs->cdw++] = 0; /* unused */ - cs->buf[cs->cdw++] = 0; /* unused */ - cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */ - cs->buf[cs->cdw++] = 0; /* unused */ - } - } -#endif - } -} - -void r600_context_streamout_end(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - struct r600_so_target **t = ctx->so_targets; - unsigned i, flush_flags = 0; - - evergreen_flush_vgt_streamout(ctx); - - for (i = 0; i < ctx->num_so_targets; i++) { -#if 0 - if (t[i]) { - cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); - cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | - STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */ - cs->buf[cs->cdw++] = 0; /* dst address lo */ - cs->buf[cs->cdw++] = 0; /* dst address hi */ - cs->buf[cs->cdw++] = 0; /* unused */ - cs->buf[cs->cdw++] = 0; /* unused */ - - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->filled_size, - RADEON_USAGE_WRITE); - - flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; - } -#endif - } - - evergreen_set_streamout_enable(ctx, 0); - - ctx->atom_surface_sync.flush_flags |= flush_flags; - r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); - - ctx->num_cs_dw_streamout_end = 0; - - /* XXX print some debug info */ - for (i = 0; i < ctx->num_so_targets; i++) { - if (!t[i]) - continue; - - uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ); - printf("FILLED_SIZE%i: %u\n", i, *ptr); - ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf); - } -} - void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t) { struct radeon_winsys_cs *cs = ctx->cs; diff --git a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h index 6d458d48bdc..c2a15ebb966 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h +++ b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h @@ -35,13 +35,6 @@ #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -/* - * evergreen_hw_context.c - */ -void evergreen_flush_vgt_streamout(struct r600_context *ctx); -void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit); - - static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct si_resource *rbo, enum radeon_bo_usage usage) { diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index 9ee96a06200..8356fda7c49 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -350,6 +350,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 0; /* Stream output. */ +#if 0 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return debug_get_bool_option("R600_STREAMOUT", FALSE) ? 4 : 0; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: @@ -357,6 +358,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: return 16*4; +#endif + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 0; /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e8a7b77da7e..1d6d2149cfd 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2204,74 +2204,6 @@ static void si_set_index_buffer(struct pipe_context *ctx, } } -/* - * Stream out - */ - -static struct pipe_stream_output_target * -si_create_so_target(struct pipe_context *ctx, - struct pipe_resource *buffer, - unsigned buffer_offset, - unsigned buffer_size) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_so_target *t; - void *ptr; - - t = CALLOC_STRUCT(r600_so_target); - if (!t) { - return NULL; - } - - t->b.reference.count = 1; - t->b.context = ctx; - pipe_resource_reference(&t->b.buffer, buffer); - t->b.buffer_offset = buffer_offset; - t->b.buffer_size = buffer_size; - - t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4); - ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); - memset(ptr, 0, t->filled_size->buf->size); - rctx->ws->buffer_unmap(t->filled_size->cs_buf); - - return &t->b; -} - -static void si_so_target_destroy(struct pipe_context *ctx, - struct pipe_stream_output_target *target) -{ - struct r600_so_target *t = (struct r600_so_target*)target; - pipe_resource_reference(&t->b.buffer, NULL); - si_resource_reference(&t->filled_size, NULL); - FREE(t); -} - -static void si_set_so_targets(struct pipe_context *ctx, - unsigned num_targets, - struct pipe_stream_output_target **targets, - unsigned append_bitmask) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - unsigned i; - - /* Stop streamout. */ - if (rctx->num_so_targets) { - r600_context_streamout_end(rctx); - } - - /* Set the new targets. */ - for (i = 0; i < num_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); - } - for (; i < rctx->num_so_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); - } - - rctx->num_so_targets = num_targets; - rctx->streamout_start = num_targets != 0; - rctx->streamout_append_bitmask = append_bitmask; -} - /* * Misc */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6729fd4df55..a69722c975b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -132,6 +132,19 @@ bool si_is_format_supported(struct pipe_screen *screen, void si_init_state_functions(struct r600_context *rctx); void si_init_config(struct r600_context *rctx); +/* si_state_streamout.c */ +struct pipe_stream_output_target * +si_create_so_target(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size); +void si_so_target_destroy(struct pipe_context *ctx, + struct pipe_stream_output_target *target); +void si_set_so_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask); + /* si_state_draw.c */ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e974642f05b..40ca95751b9 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -535,11 +535,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) si_pm4_emit_dirty(rctx); rctx->pm4_dirty_cdwords = 0; +#if 0 /* Enable stream out if needed. */ if (rctx->streamout_start) { r600_context_streamout_begin(rctx); rctx->streamout_start = FALSE; } +#endif si_context_draw(rctx, &rdraw); diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c new file mode 100644 index 00000000000..3410eb668fd --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -0,0 +1,271 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian König + */ + +#include "radeonsi_pipe.h" +#include "si_state.h" + +/* + * Stream out + */ + +#if 0 +void si_context_streamout_begin(struct r600_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->cs; + struct si_so_target **t = ctx->so_targets; + unsigned *strides = ctx->vs_shader_so_strides; + unsigned buffer_en, i; + + buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | + (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | + (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | + (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); + + ctx->num_cs_dw_streamout_end = + 12 + /* flush_vgt_streamout */ + util_bitcount(buffer_en) * 8 + + 3; + + si_need_cs_space(ctx, + 12 + /* flush_vgt_streamout */ + 6 + /* enables */ + util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + + util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + + ctx->num_cs_dw_streamout_end, TRUE); + + if (ctx->chip_class >= CAYMAN) { + evergreen_flush_vgt_streamout(ctx); + evergreen_set_streamout_enable(ctx, buffer_en); + } + + for (i = 0; i < ctx->num_so_targets; i++) { +#if 0 + if (t[i]) { + t[i]->stride = strides[i]; + t[i]->so_index = i; + + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0); + cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + + 16*i - SI_CONTEXT_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = (t[i]->b.buffer_offset + + t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ + cs->buf[cs->cdw++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */ + cs->buf[cs->cdw++] = 0; /* BUFFER_BASE */ + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = + si_context_bo_reloc(ctx, si_resource(t[i]->b.buffer), + RADEON_USAGE_WRITE); + + if (ctx->streamout_append_bitmask & (1 << i)) { + /* Append. */ + cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); + cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */ + cs->buf[cs->cdw++] = 0; /* unused */ + cs->buf[cs->cdw++] = 0; /* unused */ + cs->buf[cs->cdw++] = 0; /* src address lo */ + cs->buf[cs->cdw++] = 0; /* src address hi */ + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = + si_context_bo_reloc(ctx, t[i]->filled_size, + RADEON_USAGE_READ); + } else { + /* Start from the beginning. */ + cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); + cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */ + cs->buf[cs->cdw++] = 0; /* unused */ + cs->buf[cs->cdw++] = 0; /* unused */ + cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */ + cs->buf[cs->cdw++] = 0; /* unused */ + } + } +#endif + } +} + +void si_context_streamout_end(struct r600_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->cs; + struct si_so_target **t = ctx->so_targets; + unsigned i, flush_flags = 0; + + evergreen_flush_vgt_streamout(ctx); + + for (i = 0; i < ctx->num_so_targets; i++) { +#if 0 + if (t[i]) { + cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); + cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | + STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */ + cs->buf[cs->cdw++] = 0; /* dst address lo */ + cs->buf[cs->cdw++] = 0; /* dst address hi */ + cs->buf[cs->cdw++] = 0; /* unused */ + cs->buf[cs->cdw++] = 0; /* unused */ + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = + si_context_bo_reloc(ctx, t[i]->filled_size, + RADEON_USAGE_WRITE); + + flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; + } +#endif + } + + evergreen_set_streamout_enable(ctx, 0); + + ctx->atom_surface_sync.flush_flags |= flush_flags; + si_atom_dirty(ctx, &ctx->atom_surface_sync.atom); + + ctx->num_cs_dw_streamout_end = 0; + + /* XXX print some debug info */ + for (i = 0; i < ctx->num_so_targets; i++) { + if (!t[i]) + continue; + + uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ); + printf("FILLED_SIZE%i: %u\n", i, *ptr); + ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf); + } +} + +void evergreen_flush_vgt_streamout(struct si_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->cs; + + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); + cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = 0; + + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); + + cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); + cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ + cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2; /* register */ + cs->buf[cs->cdw++] = 0; + cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */ + cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */ + cs->buf[cs->cdw++] = 4; /* poll interval */ +} + +void evergreen_set_streamout_enable(struct si_context *ctx, unsigned buffer_enable_bit) +{ + struct radeon_winsys_cs *cs = ctx->cs; + + if (buffer_enable_bit) { + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); + cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1); + + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); + cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit); + } else { + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); + cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0); + } +} + +#endif + +struct pipe_stream_output_target * +si_create_so_target(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ +#if 0 + struct si_context *rctx = (struct r600_context *)ctx; + struct si_so_target *t; + void *ptr; + + t = CALLOC_STRUCT(si_so_target); + if (!t) { + return NULL; + } + + t->b.reference.count = 1; + t->b.context = ctx; + pipe_resource_reference(&t->b.buffer, buffer); + t->b.buffer_offset = buffer_offset; + t->b.buffer_size = buffer_size; + + t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4); + ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); + memset(ptr, 0, t->filled_size->buf->size); + rctx->ws->buffer_unmap(t->filled_size->cs_buf); + + return &t->b; +#endif + return NULL; +} + +void si_so_target_destroy(struct pipe_context *ctx, + struct pipe_stream_output_target *target) +{ +#if 0 + struct si_so_target *t = (struct r600_so_target*)target; + pipe_resource_reference(&t->b.buffer, NULL); + si_resource_reference(&t->filled_size, NULL); + FREE(t); +#endif +} + +void si_set_so_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) +{ + assert(num_targets == 0); +#if 0 + struct si_context *rctx = (struct r600_context *)ctx; + unsigned i; + + /* Stop streamout. */ + if (rctx->num_so_targets) { + si_context_streamout_end(rctx); + } + + /* Set the new targets. */ + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); + } + for (; i < rctx->num_so_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); + } + + rctx->num_so_targets = num_targets; + rctx->streamout_start = num_targets != 0; + rctx->streamout_append_bitmask = append_bitmask; +#endif +}