freedreno/a6xx: fix border-color swizzles
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
index b074214bbd31a55313f3bcc48443e91575972971..5bdfd4f6ac11561c4b5d044378e5d49048c7483b 100644 (file)
@@ -337,10 +337,10 @@ void si_destroy_shader_cache(struct si_screen *sscreen)
 /* SHADER STATES */
 
 static void si_set_tesseval_regs(struct si_screen *sscreen,
-                                struct si_shader_selector *tes,
+                                const struct si_shader_selector *tes,
                                 struct si_pm4_state *pm4)
 {
-       struct tgsi_shader_info *info = &tes->info;
+       const struct tgsi_shader_info *info = &tes->info;
        unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
        unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
        bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
@@ -396,11 +396,11 @@ static void si_set_tesseval_regs(struct si_screen *sscreen,
        } else
                distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
 
-       si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
-                      S_028B6C_TYPE(type) |
-                      S_028B6C_PARTITIONING(partitioning) |
-                      S_028B6C_TOPOLOGY(topology) |
-                      S_028B6C_DISTRIBUTION_MODE(distribution_mode));
+       assert(pm4->shader);
+       pm4->shader->vgt_tf_param = S_028B6C_TYPE(type) |
+                                   S_028B6C_PARTITIONING(partitioning) |
+                                   S_028B6C_TOPOLOGY(topology) |
+                                   S_028B6C_DISTRIBUTION_MODE(distribution_mode);
 }
 
 /* Polaris needs different VTX_REUSE_DEPTH settings depending on
@@ -440,8 +440,8 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
                    PIPE_TESS_SPACING_FRACTIONAL_ODD)
                        vtx_reuse_depth = 14;
 
-               si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
-                              vtx_reuse_depth);
+               assert(pm4->shader);
+               pm4->shader->vgt_vertex_reuse_block_cntl = vtx_reuse_depth;
        }
 }
 
@@ -464,12 +464,7 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
 static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
 {
        /* Add the pointer to VBO descriptors. */
-       if (HAVE_32BIT_POINTERS) {
-               return num_always_on_user_sgprs + 1;
-       } else {
-               assert(num_always_on_user_sgprs % 2 == 0);
-               return num_always_on_user_sgprs + 2;
-       }
+       return num_always_on_user_sgprs + 1;
 }
 
 static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
@@ -561,6 +556,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 static void si_emit_shader_es(struct si_context *sctx)
 {
        struct si_shader *shader = sctx->queued.named.es->shader;
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 
        if (!shader)
                return;
@@ -568,6 +564,19 @@ static void si_emit_shader_es(struct si_context *sctx)
        radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                                   SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
                                   shader->selector->esgs_itemsize / 4);
+
+       if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+               radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
+                                          SI_TRACKED_VGT_TF_PARAM,
+                                          shader->vgt_tf_param);
+
+       if (shader->vgt_vertex_reuse_block_cntl)
+               radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          shader->vgt_vertex_reuse_block_cntl);
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -752,6 +761,8 @@ static void gfx9_get_gs_info(struct si_shader_selector *es,
 static void si_emit_shader_gs(struct si_context *sctx)
 {
        struct si_shader *shader = sctx->queued.named.gs->shader;
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
        if (!shader)
                return;
 
@@ -802,7 +813,19 @@ static void si_emit_shader_gs(struct si_context *sctx)
                radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                                           SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
                                           shader->ctx_reg.gs.vgt_esgs_ring_itemsize);
+
+               if (shader->key.part.gs.es->type == PIPE_SHADER_TESS_EVAL)
+                       radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
+                                                  SI_TRACKED_VGT_TF_PARAM,
+                                                  shader->vgt_tf_param);
+               if (shader->vgt_vertex_reuse_block_cntl)
+                       radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                                  SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                                  shader->vgt_vertex_reuse_block_cntl);
        }
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -938,6 +961,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 static void si_emit_shader_vs(struct si_context *sctx)
 {
        struct si_shader *shader = sctx->queued.named.vs->shader;
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
        if (!shader)
                return;
 
@@ -965,6 +990,19 @@ static void si_emit_shader_vs(struct si_context *sctx)
        radeon_opt_set_context_reg(sctx, R_028818_PA_CL_VTE_CNTL,
                                   SI_TRACKED_PA_CL_VTE_CNTL,
                                   shader->ctx_reg.vs.pa_cl_vte_cntl);
+
+       if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+               radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
+                                          SI_TRACKED_VGT_TF_PARAM,
+                                          shader->vgt_tf_param);
+
+       if (shader->vgt_vertex_reuse_block_cntl)
+               radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+                                          shader->vgt_vertex_reuse_block_cntl);
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 /**
@@ -1127,6 +1165,8 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
 static void si_emit_shader_ps(struct si_context *sctx)
 {
        struct si_shader *shader = sctx->queued.named.ps->shader;
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
        if (!shader)
                return;
 
@@ -1152,6 +1192,9 @@ static void si_emit_shader_ps(struct si_context *sctx)
        radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
                                   SI_TRACKED_CB_SHADER_MASK,
                                   shader->ctx_reg.ps.cb_shader_mask);
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 static void si_shader_ps(struct si_shader *shader)
@@ -1614,7 +1657,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
 
                /* ps_uses_fbfetch is true only if the color buffer is bound. */
-               if (sctx->ps_uses_fbfetch) {
+               if (sctx->ps_uses_fbfetch && !sctx->blitter->running) {
                        struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
                        struct pipe_resource *tex = cb0->texture;
 
@@ -2000,6 +2043,9 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        assert(thread_index < ARRAY_SIZE(sscreen->compiler));
        compiler = &sscreen->compiler[thread_index];
 
+       if (sel->nir)
+               si_lower_nir(sel);
+
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
         * on demand.
@@ -2195,9 +2241,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                sel->nir = state->ir.nir;
 
                si_nir_scan_shader(sel->nir, &sel->info);
-               si_nir_scan_tess_ctrl(sel->nir, &sel->info, &sel->tcs_info);
-
-               si_lower_nir(sel);
+               si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
        }
 
        sel->type = sel->info.processor;
@@ -2820,9 +2864,13 @@ static void si_emit_spi_map(struct si_context *sctx)
        /* R_028644_SPI_PS_INPUT_CNTL_0 */
        /* Dota 2: Only ~16% of SPI map updates set different values. */
        /* Talos: Only ~9% of SPI map updates set different values. */
+       unsigned initial_cdw = sctx->gfx_cs->current.cdw;
        radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
                                    spi_ps_input_cntl,
                                    sctx->tracked_regs.spi_ps_input_cntl, num_interp);
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll_counter++;
 }
 
 /**
@@ -3042,7 +3090,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
        /* Update the shader state to use the new shader bo. */
        si_shader_init_pm4_state(sctx->screen, shader);
 
-       r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+       si_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
 
        si_shader_unlock(shader);
        return 1;
@@ -3152,7 +3200,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
        if (scratch_needed_size > 0) {
                if (scratch_needed_size > current_scratch_buffer_size) {
                        /* Create a bigger scratch buffer */
-                       r600_resource_reference(&sctx->scratch_buffer, NULL);
+                       si_resource_reference(&sctx->scratch_buffer, NULL);
 
                        sctx->scratch_buffer =
                                si_aligned_buffer_create(&sctx->screen->b,
@@ -3202,10 +3250,10 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
        si_init_config_add_vgt_flush(sctx);
 
-       si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_rings),
+       si_pm4_add_bo(sctx->init_config, si_resource(sctx->tess_rings),
                      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
 
-       uint64_t factor_va = r600_resource(sctx->tess_rings)->gpu_address +
+       uint64_t factor_va = si_resource(sctx->tess_rings)->gpu_address +
                             sctx->screen->tess_offchip_ring_size;
 
        /* Append these registers to the init config state. */