r600g: atomize streamout enabling
authorMarek Olšák <maraeo@gmail.com>
Tue, 26 Feb 2013 16:20:25 +0000 (17:20 +0100)
committerMarek Olšák <maraeo@gmail.com>
Fri, 1 Mar 2013 12:46:32 +0000 (13:46 +0100)
This doesn't fix any issue we know of, but there indeed is a week spot
in draw_vbo where streamout can fail. After streamout is enabled,
the need_cs_space call can flush the context, which causes the streamout
to be disabled right after it was enabled and bad things happen.

One way to fix it is to atomize the beginning part, so that no context flush
can happen between streamout enabling and the first drawing.

Tested-by: Andreas Boll <andreas.boll.dev@gmail.com>
src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600.h
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_buffer.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 128464e101ac1a936104557722360934ae1a8282..8b3104e40a1493016872856ca855725ef018697d 100644 (file)
@@ -438,10 +438,6 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
        ctx->ws->buffer_wait(onebo->buf, 0);
 
        COMPUTE_DBG("...\n");
-
-       ctx->streamout_start = TRUE;
-       ctx->streamout_append_bitmask = ~0;
-
 }
 
 
index 2e301bc86ec953f01d6f4afcf059d2292517ca11..b601f1430ce32f8c6784a29ab25cdee4d88de8a6 100644 (file)
@@ -2643,6 +2643,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
        r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
        r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
+       r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 
        rctx->context.create_blend_state = evergreen_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
index 11dbb3bad377dc0b82ae4125eca2669c8be99d5b..dfd18b81a921856b5538cc31cc40e003adf63ce9 100644 (file)
@@ -168,8 +168,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
                              unsigned offset, unsigned value);
 void r600_flush_emit(struct r600_context *ctx);
 
-void r600_context_streamout_begin(struct r600_context *ctx);
-void r600_context_streamout_end(struct r600_context *ctx);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
 void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
 void r600_dma_copy(struct r600_context *rctx,
index 3e0d89c06f00731d39660b19d6a08242f2d42b1a..c737aa25c43f1bb02dc6b36a525a4e729b9db5ff 100644 (file)
@@ -58,8 +58,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
        util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
        util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
        util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
-       util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
-                                    (struct pipe_stream_output_target**)rctx->so_targets);
+       util_blitter_save_so_targets(rctx->blitter, rctx->streamout.num_targets,
+                                    (struct pipe_stream_output_target**)rctx->streamout.targets);
        util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
 
        if (op & R600_SAVE_FRAGMENT_STATE) {
index 89740359be097474f043dbe3650f25fcf099909b..3267c37cc6d50f49cf42fb9e186fcf5bc586eae3 100644 (file)
@@ -126,11 +126,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                }
                        }
                        /* Streamout buffers. */
-                       for (i = 0; i < rctx->num_so_targets; i++) {
-                               if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
-                                       r600_context_streamout_end(rctx);
-                                       rctx->streamout_start = TRUE;
-                                       rctx->streamout_append_bitmask = ~0;
+                       for (i = 0; i < rctx->streamout.num_targets; i++) {
+                               if (rctx->streamout.targets[i]->b.buffer == &rbuffer->b.b) {
+                                       if (rctx->streamout.begin_emitted) {
+                                               r600_emit_streamout_end(rctx);
+                                       }
+                                       rctx->streamout.append_bitmask = rctx->streamout.enabled_mask;
+                                       r600_streamout_buffers_dirty(rctx);
                                }
                        }
                        /* Constant buffers. */
index ec098c1f7cfa5fdc44cf43e8ad235eea47ba6ac9..78b5b2930604d1d0ccc1d6a9a79779cb72a46295 100644 (file)
@@ -404,7 +404,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
 
        /* Count in streamout_end at the end of CS. */
-       num_dw += ctx->num_cs_dw_streamout_end;
+       if (ctx->streamout.begin_emitted) {
+               num_dw += ctx->streamout.num_dw_for_end;
+       }
 
        /* Count in render_condition(NULL) at the end of CS. */
        if (ctx->predicate_drawing) {
@@ -727,7 +729,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
                return;
 
        ctx->nontimer_queries_suspended = false;
-       ctx->streamout_suspended = false;
+       ctx->streamout.suspended = false;
 
        /* suspend queries */
        if (ctx->num_cs_dw_nontimer_queries_suspend) {
@@ -735,9 +737,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
                ctx->nontimer_queries_suspended = true;
        }
 
-       if (ctx->num_cs_dw_streamout_end) {
-               r600_context_streamout_end(ctx);
-               ctx->streamout_suspended = true;
+       if (ctx->streamout.begin_emitted) {
+               r600_emit_streamout_end(ctx);
+               ctx->streamout.suspended = true;
        }
 
        /* flush is needed to avoid lockups on some chips with user fences
@@ -854,9 +856,9 @@ void r600_begin_new_cs(struct r600_context *ctx)
                r600_sampler_states_dirty(ctx, &samplers->states);
        }
 
-       if (ctx->streamout_suspended) {
-               ctx->streamout_start = TRUE;
-               ctx->streamout_append_bitmask = ~0;
+       if (ctx->streamout.suspended) {
+               ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+               r600_streamout_buffers_dirty(ctx);
        }
 
        /* resume queries */
@@ -940,48 +942,23 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_
        }
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
+void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
-       struct r600_so_target **t = ctx->so_targets;
+       struct r600_so_target **t = ctx->streamout.targets;
        unsigned *stride_in_dw = ctx->vs_shader->so.stride;
-       unsigned buffer_en, i, update_flags = 0;
+       unsigned i, update_flags = 0;
        uint64_t va;
-       unsigned num_cs_dw_streamout_end;
-
-       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-       num_cs_dw_streamout_end =
-               12 + /* flush_vgt_streamout */
-               util_bitcount(buffer_en) * 8 + /* STRMOUT_BUFFER_UPDATE */
-               3 /* set_streamout_enable(0) */;
-
-       r600_need_cs_space(ctx,
-                          12 + /* flush_vgt_streamout */
-                          6 + /* set_streamout_enable */
-                          util_bitcount(buffer_en) * 7 + /* SET_CONTEXT_REG */
-                          (ctx->family >= CHIP_RS780 &&
-                           ctx->family <= CHIP_RV740 ? util_bitcount(buffer_en) * 5 : 0) + /* STRMOUT_BASE_UPDATE */
-                          util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */
-                          util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */
-                          (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */
-                          num_cs_dw_streamout_end, TRUE);
-
-       /* This must be set after r600_need_cs_space. */
-       ctx->num_cs_dw_streamout_end = num_cs_dw_streamout_end;
 
        if (ctx->chip_class >= EVERGREEN) {
                evergreen_flush_vgt_streamout(ctx);
-               evergreen_set_streamout_enable(ctx, buffer_en);
+               evergreen_set_streamout_enable(ctx, ctx->streamout.enabled_mask);
        } else {
                r600_flush_vgt_streamout(ctx);
-               r600_set_streamout_enable(ctx, buffer_en);
+               r600_set_streamout_enable(ctx, ctx->streamout.enabled_mask);
        }
 
-       for (i = 0; i < ctx->num_so_targets; i++) {
+       for (i = 0; i < ctx->streamout.num_targets; i++) {
                if (t[i]) {
                        t[i]->stride_in_dw = stride_in_dw[i];
                        t[i]->so_index = i;
@@ -1014,7 +991,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
                                                              RADEON_USAGE_WRITE);
                        }
 
-                       if (ctx->streamout_append_bitmask & (1 << i)) {
+                       if (ctx->streamout.append_bitmask & (1 << i)) {
                                va = r600_resource_va(&ctx->screen->screen,
                                                      (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
                                /* Append. */
@@ -1043,16 +1020,17 @@ void r600_context_streamout_begin(struct r600_context *ctx)
                }
        }
 
-       if (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780) {
+       if (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770) {
                cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0);
                cs->buf[cs->cdw++] = update_flags;
        }
+       ctx->streamout.begin_emitted = true;
 }
 
-void r600_context_streamout_end(struct r600_context *ctx)
+void r600_emit_streamout_end(struct r600_context *ctx)
 {
        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
-       struct r600_so_target **t = ctx->so_targets;
+       struct r600_so_target **t = ctx->streamout.targets;
        unsigned i;
        uint64_t va;
 
@@ -1062,7 +1040,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
                r600_flush_vgt_streamout(ctx);
        }
 
-       for (i = 0; i < ctx->num_so_targets; i++) {
+       for (i = 0; i < ctx->streamout.num_targets; i++) {
                if (t[i]) {
                        va = r600_resource_va(&ctx->screen->screen,
                                              (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
@@ -1079,7 +1057,6 @@ void r600_context_streamout_end(struct r600_context *ctx)
                        cs->buf[cs->cdw++] =
                                r600_context_bo_reloc(ctx,  &ctx->rings.gfx, t[i]->buf_filled_size,
                                                      RADEON_USAGE_WRITE);
-
                }
        }
 
@@ -1093,7 +1070,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
                r600_set_streamout_enable(ctx, 0);
        }
        ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
-       ctx->num_cs_dw_streamout_end = 0;
+       ctx->streamout.begin_emitted = false;
 }
 
 /* The max number of bytes to copy per packet. */
index 1d0ad7939430d5eef5a982ef5150300ff1e58cd2..9a84c1f92a3a8889d3f1d6e9a47b8f8c25311b62 100644 (file)
@@ -35,7 +35,7 @@
 #include "r600_resource.h"
 #include "evergreen_compute.h"
 
-#define R600_NUM_ATOMS 37
+#define R600_NUM_ATOMS 38
 
 #define R600_TRACE_CS 0
 
@@ -416,6 +416,19 @@ struct r600_fetch_shader {
        unsigned                        offset;
 };
 
+struct r600_streamout {
+       struct r600_atom                begin_atom;
+       bool                            begin_emitted;
+       unsigned                        num_dw_for_end;
+
+       unsigned                        enabled_mask;
+       unsigned                        num_targets;
+       struct r600_so_target           *targets[PIPE_MAX_SO_BUFFERS];
+
+       unsigned                        append_bitmask;
+       bool                            suspended;
+};
+
 struct r600_ring {
        struct radeon_winsys_cs         *cs;
        bool                            flushing;
@@ -504,6 +517,7 @@ struct r600_context {
        struct r600_vertexbuf_state     vertex_buffer_state;
        /** Vertex buffers for compute shaders */
        struct r600_vertexbuf_state     cs_vertex_buffer_state;
+       struct r600_streamout           streamout;
 
        /* Additional context states. */
        unsigned                        flags;
@@ -539,14 +553,6 @@ struct r600_context {
        unsigned                        current_render_cond_mode;
        boolean                         predicate_drawing;
 
-       /* Streamout state. */
-       unsigned                        num_cs_dw_streamout_end;
-       unsigned                        num_so_targets;
-       struct r600_so_target           *so_targets[PIPE_MAX_SO_BUFFERS];
-       boolean                         streamout_start;
-       unsigned                        streamout_append_bitmask;
-       bool                            streamout_suspended;
-
        /* Deprecated state management. */
        struct r600_range               *range;
        unsigned                        nblocks;
@@ -718,6 +724,10 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
                                   const unsigned char *swizzle_view,
                                   boolean vtx);
 
+/* r600_hw_context.c */
+void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
+void r600_emit_streamout_end(struct r600_context *ctx);
+
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom);
@@ -737,6 +747,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
                               struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
+void r600_streamout_buffers_dirty(struct r600_context *rctx);
 void r600_draw_rectangle(struct blitter_context *blitter,
                         int x1, int y1, int x2, int y2, float depth,
                         enum blitter_attrib_type type, const union pipe_color_union *attrib);
index 2d3ec93b2828cf567ce2b5299adfa7a89fac2e77..6f49eed43764f1af80e4aef1df437bf48bfeb9e3 100644 (file)
@@ -2331,6 +2331,7 @@ void r600_init_state_functions(struct r600_context *rctx)
        r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
        r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8);
        r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
+       r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
 
        rctx->context.create_blend_state = r600_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
index fae28bc9511bff0bddd0ef4e9718744525cf9f03..bc119787003c05f45eb347831fe34150ee4043b2 100644 (file)
@@ -1014,31 +1014,60 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
        FREE(t);
 }
 
-static void r600_set_so_targets(struct pipe_context *ctx,
-                               unsigned num_targets,
-                               struct pipe_stream_output_target **targets,
-                               unsigned append_bitmask)
+void r600_streamout_buffers_dirty(struct r600_context *rctx)
+{
+       rctx->streamout.num_dw_for_end =
+               12 + /* flush_vgt_streamout */
+               util_bitcount(rctx->streamout.enabled_mask) * 8 + /* STRMOUT_BUFFER_UPDATE */
+               3 /* set_streamout_enable(0) */;
+
+       rctx->streamout.begin_atom.num_dw =
+               12 + /* flush_vgt_streamout */
+               6 + /* set_streamout_enable */
+               util_bitcount(rctx->streamout.enabled_mask) * 7 + /* SET_CONTEXT_REG */
+               (rctx->family >= CHIP_RS780 &&
+                rctx->family <= CHIP_RV740 ? util_bitcount(rctx->streamout.enabled_mask) * 5 : 0) + /* STRMOUT_BASE_UPDATE */
+               util_bitcount(rctx->streamout.enabled_mask & rctx->streamout.append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */
+               util_bitcount(rctx->streamout.enabled_mask & ~rctx->streamout.append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */
+               (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */
+               rctx->streamout.num_dw_for_end;
+
+       rctx->streamout.begin_atom.dirty = true;
+}
+
+static void r600_set_streamout_targets(struct pipe_context *ctx,
+                                      unsigned num_targets,
+                                      struct pipe_stream_output_target **targets,
+                                      unsigned append_bitmask)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        unsigned i;
 
        /* Stop streamout. */
-       if (rctx->num_so_targets && !rctx->streamout_start) {
-               r600_context_streamout_end(rctx);
+       if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
+               r600_emit_streamout_end(rctx);
        }
 
        /* Set the new targets. */
        for (i = 0; i < num_targets; i++) {
-               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
                r600_context_add_resource_size(ctx, targets[i]->buffer);
        }
-       for (; i < rctx->num_so_targets; i++) {
-               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+       for (; i < rctx->streamout.num_targets; i++) {
+               pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
        }
 
-       rctx->num_so_targets = num_targets;
-       rctx->streamout_start = num_targets != 0;
-       rctx->streamout_append_bitmask = append_bitmask;
+       rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) |
+                                      (num_targets >= 2 && targets[1] ? 2 : 0) |
+                                      (num_targets >= 3 && targets[2] ? 4 : 0) |
+                                      (num_targets >= 4 && targets[3] ? 8 : 0);
+
+       rctx->streamout.num_targets = num_targets;
+       rctx->streamout.append_bitmask = append_bitmask;
+
+       if (num_targets) {
+               r600_streamout_buffers_dirty(rctx);
+       }
 }
 
 static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
@@ -1352,12 +1381,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                info.index_bias = info.start;
        }
 
-       /* Enable stream out if needed. */
-       if (rctx->streamout_start) {
-               r600_context_streamout_begin(rctx);
-               rctx->streamout_start = FALSE;
-       }
-
        /* Set the index offset and multi primitive */
        if (rctx->vgt2_state.vgt_indx_offset != info.index_bias) {
                rctx->vgt2_state.vgt_indx_offset = info.index_bias;
@@ -1764,7 +1787,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
        rctx->context.texture_barrier = r600_texture_barrier;
        rctx->context.create_stream_output_target = r600_create_so_target;
        rctx->context.stream_output_target_destroy = r600_so_target_destroy;
-       rctx->context.set_stream_output_targets = r600_set_so_targets;
+       rctx->context.set_stream_output_targets = r600_set_streamout_targets;
        rctx->context.draw_vbo = r600_draw_vbo;
 }