radeonsi: use CP DMA for the null const buffer clear on CIK
[mesa.git] / src / gallium / drivers / radeonsi / si_gfx_cs.c
index 7d190aa22b8b612ada4ed157e958e1e4803a8613..922fc560d74225d2be8abf394252b022e772e477 100644 (file)
@@ -26,6 +26,7 @@
 #include "si_pipe.h"
 
 #include "util/os_time.h"
+#include "util/u_upload_mgr.h"
 
 /* initialize */
 void si_need_gfx_cs_space(struct si_context *ctx)
@@ -64,6 +65,15 @@ void si_need_gfx_cs_space(struct si_context *ctx)
                si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
 }
 
+void si_unref_sdma_uploads(struct si_context *sctx)
+{
+       for (unsigned i = 0; i < sctx->num_sdma_uploads; i++) {
+               si_resource_reference(&sctx->sdma_uploads[i].dst, NULL);
+               si_resource_reference(&sctx->sdma_uploads[i].src, NULL);
+       }
+       sctx->num_sdma_uploads = 0;
+}
+
 void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
                     struct pipe_fence_handle **fence)
 {
@@ -98,25 +108,47 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
        if (ctx->screen->debug_flags & DBG(CHECK_VM))
                flags &= ~PIPE_FLUSH_ASYNC;
 
+       ctx->gfx_flush_in_progress = true;
+
        /* If the state tracker is flushing the GFX IB, si_flush_from_st is
         * responsible for flushing the DMA IB and merging the fences from both.
-        * This code is only needed when the driver flushes the GFX IB
-        * internally, and it never asks for a fence handle.
+        * If the driver flushes the GFX IB internally, and it should never ask
+        * for a fence handle.
         */
-       if (radeon_emitted(ctx->dma_cs, 0)) {
-               assert(fence == NULL); /* internal flushes only */
-               si_flush_dma_cs(ctx, flags, NULL);
+       assert(!radeon_emitted(ctx->dma_cs, 0) || fence == NULL);
+
+       /* Update the sdma_uploads list by flushing the uploader. */
+       u_upload_unmap(ctx->b.const_uploader);
+
+       /* Execute SDMA uploads. */
+       ctx->sdma_uploads_in_progress = true;
+       for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
+               struct si_sdma_upload *up = &ctx->sdma_uploads[i];
+               struct pipe_box box;
+
+               assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
+                      up->size % 4 == 0);
+
+               u_box_1d(up->src_offset, up->size, &box);
+               ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
+                             &up->src->b.b, 0, &box);
        }
+       ctx->sdma_uploads_in_progress = false;
+       si_unref_sdma_uploads(ctx);
 
-       ctx->gfx_flush_in_progress = true;
+       /* Flush SDMA (preamble IB). */
+       if (radeon_emitted(ctx->dma_cs, 0))
+               si_flush_dma_cs(ctx, flags, NULL);
 
-       if (!LIST_IS_EMPTY(&ctx->active_queries))
-               si_suspend_queries(ctx);
+       if (ctx->has_graphics) {
+               if (!LIST_IS_EMPTY(&ctx->active_queries))
+                       si_suspend_queries(ctx);
 
-       ctx->streamout.suspended = false;
-       if (ctx->streamout.begin_emitted) {
-               si_emit_streamout_end(ctx);
-               ctx->streamout.suspended = true;
+               ctx->streamout.suspended = false;
+               if (ctx->streamout.begin_emitted) {
+                       si_emit_streamout_end(ctx);
+                       ctx->streamout.suspended = true;
+               }
        }
 
        /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
@@ -177,7 +209,7 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
 
        pipe_reference_init(&ctx->current_saved_cs->reference, 1);
 
-       ctx->current_saved_cs->trace_buf = r600_resource(
+       ctx->current_saved_cs->trace_buf = si_resource(
                pipe_buffer_create(ctx->b.screen, 0, PIPE_USAGE_STAGING, 8));
        if (!ctx->current_saved_cs->trace_buf) {
                free(ctx->current_saved_cs);
@@ -216,6 +248,15 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                      SI_CONTEXT_INV_GLOBAL_L2 |
                      SI_CONTEXT_START_PIPELINE_STATS;
 
+       ctx->cs_shader_state.initialized = false;
+       si_all_descriptors_begin_new_cs(ctx);
+       si_all_resident_buffers_begin_new_cs(ctx);
+
+       if (!ctx->has_graphics) {
+               ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
+               return;
+       }
+
        /* set all valid group as dirty so they get reemited on
         * next draw command
         */
@@ -280,8 +321,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        /* CLEAR_STATE disables all window rectangles. */
        if (!has_clear_state || ctx->num_window_rectangles > 0)
                si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
-       si_all_descriptors_begin_new_cs(ctx);
-       si_all_resident_buffers_begin_new_cs(ctx);
 
        ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
        ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
@@ -323,8 +362,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        ctx->last_num_tcs_input_cp = -1;
        ctx->last_ls_hs_config = -1; /* impossible value */
 
-       ctx->cs_shader_state.initialized = false;
-
        if (has_clear_state) {
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;