From: Marek Olšák Date: Tue, 26 Feb 2013 16:20:25 +0000 (+0100) Subject: r600g: atomize streamout enabling X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6f25de6711491bbcfc906cd8804f30acc6534b2d;p=mesa.git r600g: atomize streamout enabling This doesn't fix any issue we know of, but there indeed is a week spot in draw_vbo where streamout can fail. After streamout is enabled, the need_cs_space call can flush the context, which causes the streamout to be disabled right after it was enabled and bad things happen. One way to fix it is to atomize the beginning part, so that no context flush can happen between streamout enabling and the first drawing. Tested-by: Andreas Boll --- diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 128464e101a..8b3104e40a1 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -438,10 +438,6 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); - - ctx->streamout_start = TRUE; - ctx->streamout_append_bitmask = ~0; - } diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 2e301bc86ec..b601f1430ce 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2643,6 +2643,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); + r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 11dbb3bad37..dfd18b81a92 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -168,8 +168,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen unsigned offset, unsigned value); void r600_flush_emit(struct r600_context *ctx); -void r600_context_streamout_begin(struct r600_context *ctx); -void r600_context_streamout_end(struct r600_context *ctx); void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw); void r600_dma_copy(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 3e0d89c06f0..c737aa25c43 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -58,8 +58,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets, - (struct pipe_stream_output_target**)rctx->so_targets); + util_blitter_save_so_targets(rctx->blitter, rctx->streamout.num_targets, + (struct pipe_stream_output_target**)rctx->streamout.targets); util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); if (op & R600_SAVE_FRAGMENT_STATE) { diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 89740359be0..3267c37cc6d 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -126,11 +126,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, } } /* Streamout buffers. */ - for (i = 0; i < rctx->num_so_targets; i++) { - if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) { - r600_context_streamout_end(rctx); - rctx->streamout_start = TRUE; - rctx->streamout_append_bitmask = ~0; + for (i = 0; i < rctx->streamout.num_targets; i++) { + if (rctx->streamout.targets[i]->b.buffer == &rbuffer->b.b) { + if (rctx->streamout.begin_emitted) { + r600_emit_streamout_end(rctx); + } + rctx->streamout.append_bitmask = rctx->streamout.enabled_mask; + r600_streamout_buffers_dirty(rctx); } } /* Constant buffers. */ diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ec098c1f7cf..78b5b293060 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -404,7 +404,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += ctx->num_cs_dw_nontimer_queries_suspend; /* Count in streamout_end at the end of CS. */ - num_dw += ctx->num_cs_dw_streamout_end; + if (ctx->streamout.begin_emitted) { + num_dw += ctx->streamout.num_dw_for_end; + } /* Count in render_condition(NULL) at the end of CS. */ if (ctx->predicate_drawing) { @@ -727,7 +729,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) return; ctx->nontimer_queries_suspended = false; - ctx->streamout_suspended = false; + ctx->streamout.suspended = false; /* suspend queries */ if (ctx->num_cs_dw_nontimer_queries_suspend) { @@ -735,9 +737,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) ctx->nontimer_queries_suspended = true; } - if (ctx->num_cs_dw_streamout_end) { - r600_context_streamout_end(ctx); - ctx->streamout_suspended = true; + if (ctx->streamout.begin_emitted) { + r600_emit_streamout_end(ctx); + ctx->streamout.suspended = true; } /* flush is needed to avoid lockups on some chips with user fences @@ -854,9 +856,9 @@ void r600_begin_new_cs(struct r600_context *ctx) r600_sampler_states_dirty(ctx, &samplers->states); } - if (ctx->streamout_suspended) { - ctx->streamout_start = TRUE; - ctx->streamout_append_bitmask = ~0; + if (ctx->streamout.suspended) { + ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; + r600_streamout_buffers_dirty(ctx); } /* resume queries */ @@ -940,48 +942,23 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_ } } -void r600_context_streamout_begin(struct r600_context *ctx) +void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; - struct r600_so_target **t = ctx->so_targets; + struct r600_so_target **t = ctx->streamout.targets; unsigned *stride_in_dw = ctx->vs_shader->so.stride; - unsigned buffer_en, i, update_flags = 0; + unsigned i, update_flags = 0; uint64_t va; - unsigned num_cs_dw_streamout_end; - - buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | - (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | - (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | - (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); - - num_cs_dw_streamout_end = - 12 + /* flush_vgt_streamout */ - util_bitcount(buffer_en) * 8 + /* STRMOUT_BUFFER_UPDATE */ - 3 /* set_streamout_enable(0) */; - - r600_need_cs_space(ctx, - 12 + /* flush_vgt_streamout */ - 6 + /* set_streamout_enable */ - util_bitcount(buffer_en) * 7 + /* SET_CONTEXT_REG */ - (ctx->family >= CHIP_RS780 && - ctx->family <= CHIP_RV740 ? util_bitcount(buffer_en) * 5 : 0) + /* STRMOUT_BASE_UPDATE */ - util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */ - util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */ - (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */ - num_cs_dw_streamout_end, TRUE); - - /* This must be set after r600_need_cs_space. */ - ctx->num_cs_dw_streamout_end = num_cs_dw_streamout_end; if (ctx->chip_class >= EVERGREEN) { evergreen_flush_vgt_streamout(ctx); - evergreen_set_streamout_enable(ctx, buffer_en); + evergreen_set_streamout_enable(ctx, ctx->streamout.enabled_mask); } else { r600_flush_vgt_streamout(ctx); - r600_set_streamout_enable(ctx, buffer_en); + r600_set_streamout_enable(ctx, ctx->streamout.enabled_mask); } - for (i = 0; i < ctx->num_so_targets; i++) { + for (i = 0; i < ctx->streamout.num_targets; i++) { if (t[i]) { t[i]->stride_in_dw = stride_in_dw[i]; t[i]->so_index = i; @@ -1014,7 +991,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) RADEON_USAGE_WRITE); } - if (ctx->streamout_append_bitmask & (1 << i)) { + if (ctx->streamout.append_bitmask & (1 << i)) { va = r600_resource_va(&ctx->screen->screen, (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; /* Append. */ @@ -1043,16 +1020,17 @@ void r600_context_streamout_begin(struct r600_context *ctx) } } - if (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780) { + if (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770) { cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); cs->buf[cs->cdw++] = update_flags; } + ctx->streamout.begin_emitted = true; } -void r600_context_streamout_end(struct r600_context *ctx) +void r600_emit_streamout_end(struct r600_context *ctx) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; - struct r600_so_target **t = ctx->so_targets; + struct r600_so_target **t = ctx->streamout.targets; unsigned i; uint64_t va; @@ -1062,7 +1040,7 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_flush_vgt_streamout(ctx); } - for (i = 0; i < ctx->num_so_targets; i++) { + for (i = 0; i < ctx->streamout.num_targets; i++) { if (t[i]) { va = r600_resource_va(&ctx->screen->screen, (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; @@ -1079,7 +1057,6 @@ void r600_context_streamout_end(struct r600_context *ctx) cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, t[i]->buf_filled_size, RADEON_USAGE_WRITE); - } } @@ -1093,7 +1070,7 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_set_streamout_enable(ctx, 0); } ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; - ctx->num_cs_dw_streamout_end = 0; + ctx->streamout.begin_emitted = false; } /* The max number of bytes to copy per packet. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 1d0ad793943..9a84c1f92a3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -35,7 +35,7 @@ #include "r600_resource.h" #include "evergreen_compute.h" -#define R600_NUM_ATOMS 37 +#define R600_NUM_ATOMS 38 #define R600_TRACE_CS 0 @@ -416,6 +416,19 @@ struct r600_fetch_shader { unsigned offset; }; +struct r600_streamout { + struct r600_atom begin_atom; + bool begin_emitted; + unsigned num_dw_for_end; + + unsigned enabled_mask; + unsigned num_targets; + struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS]; + + unsigned append_bitmask; + bool suspended; +}; + struct r600_ring { struct radeon_winsys_cs *cs; bool flushing; @@ -504,6 +517,7 @@ struct r600_context { struct r600_vertexbuf_state vertex_buffer_state; /** Vertex buffers for compute shaders */ struct r600_vertexbuf_state cs_vertex_buffer_state; + struct r600_streamout streamout; /* Additional context states. */ unsigned flags; @@ -539,14 +553,6 @@ struct r600_context { unsigned current_render_cond_mode; boolean predicate_drawing; - /* Streamout state. */ - unsigned num_cs_dw_streamout_end; - unsigned num_so_targets; - struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; - boolean streamout_start; - unsigned streamout_append_bitmask; - bool streamout_suspended; - /* Deprecated state management. */ struct r600_range *range; unsigned nblocks; @@ -718,6 +724,10 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean vtx); +/* r600_hw_context.c */ +void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom); +void r600_emit_streamout_end(struct r600_context *ctx); + /* r600_state_common.c */ void r600_init_common_state_functions(struct r600_context *rctx); void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom); @@ -737,6 +747,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); +void r600_streamout_buffers_dirty(struct r600_context *rctx); void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 2d3ec93b282..6f49eed4376 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2331,6 +2331,7 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); + r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index fae28bc9511..bc119787003 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1014,31 +1014,60 @@ static void r600_so_target_destroy(struct pipe_context *ctx, FREE(t); } -static void r600_set_so_targets(struct pipe_context *ctx, - unsigned num_targets, - struct pipe_stream_output_target **targets, - unsigned append_bitmask) +void r600_streamout_buffers_dirty(struct r600_context *rctx) +{ + rctx->streamout.num_dw_for_end = + 12 + /* flush_vgt_streamout */ + util_bitcount(rctx->streamout.enabled_mask) * 8 + /* STRMOUT_BUFFER_UPDATE */ + 3 /* set_streamout_enable(0) */; + + rctx->streamout.begin_atom.num_dw = + 12 + /* flush_vgt_streamout */ + 6 + /* set_streamout_enable */ + util_bitcount(rctx->streamout.enabled_mask) * 7 + /* SET_CONTEXT_REG */ + (rctx->family >= CHIP_RS780 && + rctx->family <= CHIP_RV740 ? util_bitcount(rctx->streamout.enabled_mask) * 5 : 0) + /* STRMOUT_BASE_UPDATE */ + util_bitcount(rctx->streamout.enabled_mask & rctx->streamout.append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */ + util_bitcount(rctx->streamout.enabled_mask & ~rctx->streamout.append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */ + (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */ + rctx->streamout.num_dw_for_end; + + rctx->streamout.begin_atom.dirty = true; +} + +static void r600_set_streamout_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) { struct r600_context *rctx = (struct r600_context *)ctx; unsigned i; /* Stop streamout. */ - if (rctx->num_so_targets && !rctx->streamout_start) { - r600_context_streamout_end(rctx); + if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) { + r600_emit_streamout_end(rctx); } /* Set the new targets. */ for (i = 0; i < num_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]); r600_context_add_resource_size(ctx, targets[i]->buffer); } - for (; i < rctx->num_so_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); + for (; i < rctx->streamout.num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL); } - rctx->num_so_targets = num_targets; - rctx->streamout_start = num_targets != 0; - rctx->streamout_append_bitmask = append_bitmask; + rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) | + (num_targets >= 2 && targets[1] ? 2 : 0) | + (num_targets >= 3 && targets[2] ? 4 : 0) | + (num_targets >= 4 && targets[3] ? 8 : 0); + + rctx->streamout.num_targets = num_targets; + rctx->streamout.append_bitmask = append_bitmask; + + if (num_targets) { + r600_streamout_buffers_dirty(rctx); + } } static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) @@ -1352,12 +1381,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info info.index_bias = info.start; } - /* Enable stream out if needed. */ - if (rctx->streamout_start) { - r600_context_streamout_begin(rctx); - rctx->streamout_start = FALSE; - } - /* Set the index offset and multi primitive */ if (rctx->vgt2_state.vgt_indx_offset != info.index_bias) { rctx->vgt2_state.vgt_indx_offset = info.index_bias; @@ -1764,7 +1787,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->context.texture_barrier = r600_texture_barrier; rctx->context.create_stream_output_target = r600_create_so_target; rctx->context.stream_output_target_destroy = r600_so_target_destroy; - rctx->context.set_stream_output_targets = r600_set_so_targets; + rctx->context.set_stream_output_targets = r600_set_streamout_targets; rctx->context.draw_vbo = r600_draw_vbo; }