- struct si_context *sctx = (struct si_context *)ctx;
- unsigned old_num_targets = sctx->streamout.num_targets;
- unsigned i;
- bool wait_now = false;
-
- /* We are going to unbind the buffers. Mark which caches need to be flushed. */
- if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
- /* Since streamout uses vector writes which go through TC L2
- * and most other clients can use TC L2 as well, we don't need
- * to flush it.
- *
- * The only cases which requires flushing it is VGT DMA index
- * fetching (on <= GFX7) and indirect draw data, which are rare
- * cases. Thus, flag the TC L2 dirtiness in the resource and
- * handle it at draw call time.
- */
- for (i = 0; i < sctx->streamout.num_targets; i++)
- if (sctx->streamout.targets[i])
- si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
-
- /* Invalidate the scalar cache in case a streamout buffer is
- * going to be used as a constant buffer.
- *
- * Invalidate vL1, because streamout bypasses it (done by
- * setting GLC=1 in the store instruction), but vL1 in other
- * CUs can contain outdated data of streamout buffers.
- *
- * VS_PARTIAL_FLUSH is required if the buffers are going to be
- * used as an input immediately.
- */
- sctx->flags |= SI_CONTEXT_INV_SCACHE |
- SI_CONTEXT_INV_VCACHE;
-
- /* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
- if (sctx->screen->use_ngg_streamout) {
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
-
- /* Wait now. This is needed to make sure that GDS is not
- * busy at the end of IBs.
- *
- * Also, the next streamout operation will overwrite GDS,
- * so we need to make sure that it's idle.
- */
- wait_now = true;
- } else {
- sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
- }
- }
-
- /* All readers of the streamout targets need to be finished before we can
- * start writing to the targets.
- */
- if (num_targets) {
- if (sctx->screen->use_ngg_streamout)
- si_allocate_gds(sctx);
-
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
- }
-
- /* Streamout buffers must be bound in 2 places:
- * 1) in VGT by setting the VGT_STRMOUT registers
- * 2) as shader resources
- */
-
- /* Stop streamout. */
- if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
- si_emit_streamout_end(sctx);
-
- /* Set the new targets. */
- unsigned enabled_mask = 0, append_bitmask = 0;
- for (i = 0; i < num_targets; i++) {
- si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
- if (!targets[i])
- continue;
-
- si_context_add_resource_size(sctx, targets[i]->buffer);
- enabled_mask |= 1 << i;
-
- if (offsets[i] == ((unsigned)-1))
- append_bitmask |= 1 << i;
- }
-
- for (; i < sctx->streamout.num_targets; i++)
- si_so_target_reference(&sctx->streamout.targets[i], NULL);
-
- sctx->streamout.enabled_mask = enabled_mask;
- sctx->streamout.num_targets = num_targets;
- sctx->streamout.append_bitmask = append_bitmask;
-
- /* Update dirty state bits. */
- if (num_targets) {
- si_streamout_buffers_dirty(sctx);
- } else {
- si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
- si_set_streamout_enable(sctx, false);
- }
-
- /* Set the shader resources.*/
- for (i = 0; i < num_targets; i++) {
- if (targets[i]) {
- struct pipe_shader_buffer sbuf;
- sbuf.buffer = targets[i]->buffer;
-
- if (sctx->screen->use_ngg_streamout) {
- sbuf.buffer_offset = targets[i]->buffer_offset;
- sbuf.buffer_size = targets[i]->buffer_size;
- } else {
- sbuf.buffer_offset = 0;
- sbuf.buffer_size = targets[i]->buffer_offset +
- targets[i]->buffer_size;
- }
-
- si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf);
- si_resource(targets[i]->buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
- } else {
- si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
- }
- }
- for (; i < old_num_targets; i++)
- si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
-
- if (wait_now)
- sctx->emit_cache_flush(sctx);
+ struct si_context *sctx = (struct si_context *)ctx;
+ unsigned old_num_targets = sctx->streamout.num_targets;
+ unsigned i;
+ bool wait_now = false;
+
+ /* We are going to unbind the buffers. Mark which caches need to be flushed. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
+ /* Since streamout uses vector writes which go through TC L2
+ * and most other clients can use TC L2 as well, we don't need
+ * to flush it.
+ *
+ * The only cases which requires flushing it is VGT DMA index
+ * fetching (on <= GFX7) and indirect draw data, which are rare
+ * cases. Thus, flag the TC L2 dirtiness in the resource and
+ * handle it at draw call time.
+ */
+ for (i = 0; i < sctx->streamout.num_targets; i++)
+ if (sctx->streamout.targets[i])
+ si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+ /* Invalidate the scalar cache in case a streamout buffer is
+ * going to be used as a constant buffer.
+ *
+ * Invalidate vL1, because streamout bypasses it (done by
+ * setting GLC=1 in the store instruction), but vL1 in other
+ * CUs can contain outdated data of streamout buffers.
+ *
+ * VS_PARTIAL_FLUSH is required if the buffers are going to be
+ * used as an input immediately.
+ */
+ sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
+
+ /* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
+ if (sctx->screen->use_ngg_streamout) {
+ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
+ /* Wait now. This is needed to make sure that GDS is not
+ * busy at the end of IBs.
+ *
+ * Also, the next streamout operation will overwrite GDS,
+ * so we need to make sure that it's idle.
+ */
+ wait_now = true;
+ } else {
+ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
+ }
+ }
+
+ /* All readers of the streamout targets need to be finished before we can
+ * start writing to the targets.
+ */
+ if (num_targets) {
+ if (sctx->screen->use_ngg_streamout)
+ si_allocate_gds(sctx);
+
+ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
+ }
+
+ /* Streamout buffers must be bound in 2 places:
+ * 1) in VGT by setting the VGT_STRMOUT registers
+ * 2) as shader resources
+ */
+
+ /* Stop streamout. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
+ si_emit_streamout_end(sctx);
+
+ /* Set the new targets. */
+ unsigned enabled_mask = 0, append_bitmask = 0;
+ for (i = 0; i < num_targets; i++) {
+ si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
+ if (!targets[i])
+ continue;
+
+ si_context_add_resource_size(sctx, targets[i]->buffer);
+ enabled_mask |= 1 << i;
+
+ if (offsets[i] == ((unsigned)-1))
+ append_bitmask |= 1 << i;
+ }
+
+ for (; i < sctx->streamout.num_targets; i++)
+ si_so_target_reference(&sctx->streamout.targets[i], NULL);
+
+ sctx->streamout.enabled_mask = enabled_mask;
+ sctx->streamout.num_targets = num_targets;
+ sctx->streamout.append_bitmask = append_bitmask;
+
+ /* Update dirty state bits. */
+ if (num_targets) {
+ si_streamout_buffers_dirty(sctx);
+ } else {
+ si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
+ si_set_streamout_enable(sctx, false);
+ }
+
+ /* Set the shader resources.*/
+ for (i = 0; i < num_targets; i++) {
+ if (targets[i]) {
+ struct pipe_shader_buffer sbuf;
+ sbuf.buffer = targets[i]->buffer;
+
+ if (sctx->screen->use_ngg_streamout) {
+ sbuf.buffer_offset = targets[i]->buffer_offset;
+ sbuf.buffer_size = targets[i]->buffer_size;
+ } else {
+ sbuf.buffer_offset = 0;
+ sbuf.buffer_size = targets[i]->buffer_offset + targets[i]->buffer_size;
+ }
+
+ si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf);
+ si_resource(targets[i]->buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
+ } else {
+ si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
+ }
+ }
+ for (; i < old_num_targets; i++)
+ si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
+
+ if (wait_now)
+ sctx->emit_cache_flush(sctx);