radeonsi: fix point sprites
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
index 3a5b0ae5fee4098a0dde6cbff630bf157e748951..3827c7b662d6609282a59473e5c61f75c2c24b26 100644 (file)
@@ -64,9 +64,11 @@ static void si_shader_es(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
                       S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
                       S_00B328_SGPRS((num_sgprs - 1) / 8) |
-                      S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt));
+                      S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
+                      S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
-                      S_00B32C_USER_SGPR(num_user_sgprs));
+                      S_00B32C_USER_SGPR(num_user_sgprs) |
+                      S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_gs(struct si_shader *shader)
@@ -132,9 +134,11 @@ static void si_shader_gs(struct si_shader *shader)
 
        si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
                       S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
-                      S_00B228_SGPRS((num_sgprs - 1) / 8));
+                      S_00B228_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
-                      S_00B22C_USER_SGPR(num_user_sgprs));
+                      S_00B22C_USER_SGPR(num_user_sgprs) |
+                      S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_vs(struct si_shader *shader)
@@ -206,14 +210,16 @@ static void si_shader_vs(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
                       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
                       S_00B128_SGPRS((num_sgprs - 1) / 8) |
-                      S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt));
+                      S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
+                      S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
                       S_00B12C_USER_SGPR(num_user_sgprs) |
                       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
                       S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
                       S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
                       S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
-                      S_00B12C_SO_EN(!!shader->selector->so.num_outputs));
+                      S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
+                      S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
        if (window_space)
                si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
                               S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
@@ -304,14 +310,20 @@ static void si_shader_ps(struct si_shader *shader)
 
        si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
                       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
-                      S_00B028_SGPRS((num_sgprs - 1) / 8));
+                      S_00B028_SGPRS((num_sgprs - 1) / 8) |
+                      S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
                       S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
-                      S_00B02C_USER_SGPR(num_user_sgprs));
+                      S_00B02C_USER_SGPR(num_user_sgprs) |
+                      S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_init_pm4_state(struct si_shader *shader)
 {
+
+       if (shader->pm4)
+               si_pm4_free_state_simple(shader->pm4);
+
        switch (shader->selector->type) {
        case PIPE_SHADER_VERTEX:
                if (shader->key.vs.as_es)
@@ -358,13 +370,17 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
 
                if (sctx->queued.named.rasterizer) {
                        key->ps.color_two_side = sctx->queued.named.rasterizer->two_side;
-                       key->ps.flatshade = sctx->queued.named.rasterizer->flatshade;
 
                        if (sctx->queued.named.blend) {
                                key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
                                                       sctx->queued.named.rasterizer->multisample_enable &&
                                                       !sctx->framebuffer.cb0_is_integer;
                        }
+
+                       key->ps.poly_stipple = sctx->queued.named.rasterizer->poly_stipple_enable &&
+                                              ((sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
+                                                sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
+                                               sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY);
                }
                if (sctx->queued.named.dsa) {
                        key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
@@ -620,7 +636,8 @@ static void si_update_spi_map(struct si_context *sctx)
                unsigned interpolate = psinfo->input_interpolate[i];
                unsigned param_offset = ps->ps_input_param_offset[i];
 
-               if (name == TGSI_SEMANTIC_POSITION)
+               if (name == TGSI_SEMANTIC_POSITION ||
+                   name == TGSI_SEMANTIC_FACE)
                        /* Read from preloaded VGPRs, not parameters */
                        continue;
 
@@ -628,10 +645,8 @@ bcolor:
                tmp = 0;
 
                if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
-                   (interpolate == TGSI_INTERPOLATE_COLOR &&
-                    ps->key.ps.flatshade)) {
+                   (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
                        tmp |= S_028644_FLAT_SHADE(1);
-               }
 
                if (name == TGSI_SEMANTIC_GENERIC &&
                    sctx->sprite_coord_enable & (1 << index)) {
@@ -646,9 +661,11 @@ bcolor:
                        }
                }
 
-               if (j == vsinfo->num_outputs) {
-                       /* No corresponding output found, load defaults into input */
-                       tmp |= S_028644_OFFSET(0x20);
+               if (j == vsinfo->num_outputs & !G_028644_PT_SPRITE_TEX(tmp)) {
+                       /* No corresponding output found, load defaults into input.
+                        * Don't set any other bits.
+                        * (FLAT_SHADE=1 completely changes behavior) */
+                       tmp = S_028644_OFFSET(0x20);
                }
 
                si_pm4_set_reg(pm4,
@@ -704,9 +721,123 @@ static void si_init_gs_rings(struct si_context *sctx)
                           false, false, 0, 0);
 }
 
+/**
+ * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
+ *          otherwise.
+ */
+static unsigned si_update_scratch_buffer(struct si_context *sctx,
+                                   struct si_shader_selector *sel)
+{
+       struct si_shader *shader;
+       uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+       unsigned char *ptr;
+
+       if (!sel)
+               return 0;
+
+       shader = sel->current;
+
+       /* This shader doesn't need a scratch buffer */
+       if (shader->scratch_bytes_per_wave == 0)
+               return 0;
+
+       /* This shader is already configured to use the current
+        * scratch buffer. */
+       if (shader->scratch_bo == sctx->scratch_buffer)
+               return 0;
+
+       assert(sctx->scratch_buffer);
+
+       si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
+
+       /* Replace the shader bo with a new bo that has the relocs applied. */
+       r600_resource_reference(&shader->bo, NULL);
+       shader->bo = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_IMMUTABLE,
+                                              shader->binary.code_size);
+       ptr = sctx->screen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+       util_memcpy_cpu_to_le32(ptr, shader->binary.code, shader->binary.code_size);
+       sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+
+       /* Update the shader state to use the new shader bo. */
+       si_shader_init_pm4_state(shader);
+
+       r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+
+       return 1;
+}
+
+static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
+{
+       if (!sctx->scratch_buffer)
+               return 0;
+
+       return sctx->scratch_buffer->b.b.width0;
+}
+
+static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
+                                       struct si_shader_selector *sel)
+{
+       if (!sel)
+               return 0;
+
+       return sel->current->scratch_bytes_per_wave;
+}
+
+static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
+{
+
+       return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
+                       si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
+                       si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+}
+
+static void si_update_spi_tmpring_size(struct si_context *sctx)
+{
+       unsigned current_scratch_buffer_size =
+               si_get_current_scratch_buffer_size(sctx);
+       unsigned scratch_bytes_per_wave =
+               si_get_max_scratch_bytes_per_wave(sctx);
+       unsigned scratch_needed_size = scratch_bytes_per_wave *
+               sctx->scratch_waves;
+
+       if (scratch_needed_size > 0) {
+
+               if (scratch_needed_size > current_scratch_buffer_size) {
+                       /* Create a bigger scratch buffer */
+                       pipe_resource_reference(
+                                       (struct pipe_resource**)&sctx->scratch_buffer,
+                                       NULL);
+
+                       sctx->scratch_buffer =
+                                       si_resource_create_custom(&sctx->screen->b.b,
+                                       PIPE_USAGE_DEFAULT, scratch_needed_size);
+               }
+
+               /* Update the shaders, so they are using the latest scratch.  The
+                * scratch buffer may have been changed since these shaders were
+                * last used, so we still need to try to update them, even if
+                * they require scratch buffers smaller than the current size.
+                */
+               if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+                       si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+               if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+                       si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+               if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+                       si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+       }
+
+       /* The LLVM shader backend should be reporting aligned scratch_sizes. */
+       assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
+               "scratch size should already be aligned correctly.");
+
+       sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
+                               S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+}
+
 void si_update_shaders(struct si_context *sctx)
 {
        struct pipe_context *ctx = (struct pipe_context*)sctx;
+       struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
        if (sctx->gs_shader) {
                si_shader_select(ctx, sctx->gs_shader);
@@ -721,7 +852,7 @@ void si_update_shaders(struct si_context *sctx)
                if (!sctx->gs_rings)
                        si_init_gs_rings(sctx);
                if (sctx->emitted.named.gs_rings != sctx->gs_rings)
-                       sctx->b.flags |= R600_CONTEXT_VGT_FLUSH;
+                       sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
                si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
 
                si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
@@ -772,11 +903,18 @@ void si_update_shaders(struct si_context *sctx)
        si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
 
        if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-           sctx->sprite_coord_enable != sctx->queued.named.rasterizer->sprite_coord_enable) {
-               sctx->sprite_coord_enable = sctx->queued.named.rasterizer->sprite_coord_enable;
+           sctx->sprite_coord_enable != rs->sprite_coord_enable ||
+           sctx->flatshade != rs->flatshade) {
+               sctx->sprite_coord_enable = rs->sprite_coord_enable;
+               sctx->flatshade = rs->flatshade;
                si_update_spi_map(sctx);
        }
 
+       if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
+           si_pm4_state_changed(sctx, gs)) {
+               si_update_spi_tmpring_size(sctx);
+       }
+
        if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
                sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
                sctx->db_render_state.dirty = true;