gallium: Redefine the max texture 2d cap from _LEVELS to _SIZE.
[mesa.git] / src / gallium / drivers / radeonsi / si_gfx_cs.c
index ec74c1bc70368363cdf87a85a0b696ce0277f527..d0d405c473f17994aaa5906d1302321e0d40d373 100644 (file)
 #include "si_pipe.h"
 
 #include "util/os_time.h"
+#include "util/u_upload_mgr.h"
 
 /* initialize */
 void si_need_gfx_cs_space(struct si_context *ctx)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx_cs;
+       struct radeon_cmdbuf *cs = ctx->gfx_cs;
 
        /* There is no need to flush the DMA IB here, because
-        * r600_need_dma_space always flushes the GFX IB if there is
+        * si_need_dma_space always flushes the GFX IB if there is
         * a conflict, which means any unflushed DMA commands automatically
         * precede the GFX IB (= they had no dependency on the GFX IB when
         * they were submitted).
@@ -53,21 +54,24 @@ void si_need_gfx_cs_space(struct si_context *ctx)
        ctx->gtt = 0;
        ctx->vram = 0;
 
-       /* If the IB is sufficiently large, don't count the space needed
-        * and just flush if there is not enough space left.
-        *
-        * Also reserve space for stopping queries at the end of IB, because
-        * the number of active queries is mostly unlimited.
-        */
-       unsigned need_dwords = 2048 + ctx->num_cs_dw_queries_suspend;
+       unsigned need_dwords = si_get_minimum_num_gfx_cs_dwords(ctx);
        if (!ctx->ws->cs_check_space(cs, need_dwords))
                si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
 }
 
+void si_unref_sdma_uploads(struct si_context *sctx)
+{
+       for (unsigned i = 0; i < sctx->num_sdma_uploads; i++) {
+               si_resource_reference(&sctx->sdma_uploads[i].dst, NULL);
+               si_resource_reference(&sctx->sdma_uploads[i].src, NULL);
+       }
+       sctx->num_sdma_uploads = 0;
+}
+
 void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
                     struct pipe_fence_handle **fence)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx_cs;
+       struct radeon_cmdbuf *cs = ctx->gfx_cs;
        struct radeon_winsys *ws = ctx->ws;
        unsigned wait_flags = 0;
 
@@ -98,25 +102,47 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
        if (ctx->screen->debug_flags & DBG(CHECK_VM))
                flags &= ~PIPE_FLUSH_ASYNC;
 
+       ctx->gfx_flush_in_progress = true;
+
        /* If the state tracker is flushing the GFX IB, si_flush_from_st is
         * responsible for flushing the DMA IB and merging the fences from both.
-        * This code is only needed when the driver flushes the GFX IB
-        * internally, and it never asks for a fence handle.
+        * If the driver flushes the GFX IB internally, and it should never ask
+        * for a fence handle.
         */
-       if (radeon_emitted(ctx->dma_cs, 0)) {
-               assert(fence == NULL); /* internal flushes only */
-               si_flush_dma_cs(ctx, flags, NULL);
+       assert(!radeon_emitted(ctx->dma_cs, 0) || fence == NULL);
+
+       /* Update the sdma_uploads list by flushing the uploader. */
+       u_upload_unmap(ctx->b.const_uploader);
+
+       /* Execute SDMA uploads. */
+       ctx->sdma_uploads_in_progress = true;
+       for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
+               struct si_sdma_upload *up = &ctx->sdma_uploads[i];
+               struct pipe_box box;
+
+               assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
+                      up->size % 4 == 0);
+
+               u_box_1d(up->src_offset, up->size, &box);
+               ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
+                             &up->src->b.b, 0, &box);
        }
+       ctx->sdma_uploads_in_progress = false;
+       si_unref_sdma_uploads(ctx);
 
-       ctx->gfx_flush_in_progress = true;
+       /* Flush SDMA (preamble IB). */
+       if (radeon_emitted(ctx->dma_cs, 0))
+               si_flush_dma_cs(ctx, flags, NULL);
 
-       if (!LIST_IS_EMPTY(&ctx->active_queries))
-               si_suspend_queries(ctx);
+       if (ctx->has_graphics) {
+               if (!LIST_IS_EMPTY(&ctx->active_queries))
+                       si_suspend_queries(ctx);
 
-       ctx->streamout.suspended = false;
-       if (ctx->streamout.begin_emitted) {
-               si_emit_streamout_end(ctx);
-               ctx->streamout.suspended = true;
+               ctx->streamout.suspended = false;
+               if (ctx->streamout.begin_emitted) {
+                       si_emit_streamout_end(ctx);
+                       ctx->streamout.suspended = true;
+               }
        }
 
        /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
@@ -133,12 +159,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 
        if (ctx->current_saved_cs) {
                si_trace_emit(ctx);
-               si_log_hw_flush(ctx);
 
                /* Save the IB for debug contexts. */
                si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
                ctx->current_saved_cs->flushed = true;
                ctx->current_saved_cs->time_flush = os_time_get_nano();
+
+               si_log_hw_flush(ctx);
        }
 
        /* Flush the CS. */
@@ -146,8 +173,6 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
        if (fence)
                ws->fence_reference(fence, ctx->last_gfx_fence);
 
-       /* This must be after cs_flush returns, since the context's API
-        * thread can concurrently read this value in si_fence_finish. */
        ctx->num_gfx_cs_flushes++;
 
        /* Check VM faults if needed. */
@@ -178,7 +203,7 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
 
        pipe_reference_init(&ctx->current_saved_cs->reference, 1);
 
-       ctx->current_saved_cs->trace_buf = r600_resource(
+       ctx->current_saved_cs->trace_buf = si_resource(
                pipe_buffer_create(ctx->b.screen, 0, PIPE_USAGE_STAGING, 8));
        if (!ctx->current_saved_cs->trace_buf) {
                free(ctx->current_saved_cs);
@@ -217,6 +242,14 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                      SI_CONTEXT_INV_GLOBAL_L2 |
                      SI_CONTEXT_START_PIPELINE_STATS;
 
+       ctx->cs_shader_state.initialized = false;
+       si_all_descriptors_begin_new_cs(ctx);
+
+       if (!ctx->has_graphics) {
+               ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
+               return;
+       }
+
        /* set all valid group as dirty so they get reemited on
         * next draw command
         */
@@ -278,12 +311,11 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
-       si_all_descriptors_begin_new_cs(ctx);
-       si_all_resident_buffers_begin_new_cs(ctx);
+       /* CLEAR_STATE disables all window rectangles. */
+       if (!has_clear_state || ctx->num_window_rectangles > 0)
+               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
 
-       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
        si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
@@ -309,7 +341,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        ctx->last_index_size = -1;
        ctx->last_primitive_restart_en = -1;
        ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
-       ctx->last_gs_out_prim = -1;
        ctx->last_prim = -1;
        ctx->last_multi_vgt_param = -1;
        ctx->last_rast_prim = -1;
@@ -319,6 +350,72 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
        ctx->last_tcs = NULL;
        ctx->last_tes_sh_base = -1;
        ctx->last_num_tcs_input_cp = -1;
+       ctx->last_ls_hs_config = -1; /* impossible value */
 
-       ctx->cs_shader_state.initialized = false;
+       if (has_clear_state) {
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
+               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x00001000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
+               ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ]  = 0x3f800000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ]  = 0x3f800000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]  = 0x3f800000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]  = 0x3f800000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x00000005;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     = 0xffff;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE]    = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MODE]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_REUSE_OFF]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL]  = 0x00000002;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM]  = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL]  = 0x0000001e; /* From VI */
+
+               /* Set all saved registers state to saved. */
+               ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
+       } else {
+               /* Set all saved registers state to unknown. */
+               ctx->tracked_regs.reg_saved = 0;
+       }
+
+       /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
+       memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
 }