radeonsi: kill point size VS output if it's not used by the rasterizer
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index 30f46c66258961cfaf30a02b77c583dbb1cedd12..75507a30cc4e37c068b944fb6cc2165007c559c2 100644 (file)
@@ -32,6 +32,7 @@
 #include "util/u_memory.h"
 #include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
+#include "util/u_blend.h"
 
 #include "gfx10_format_table.h"
 
@@ -426,13 +427,6 @@ static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *
    }
 }
 
-static bool si_blend_factor_uses_dst(unsigned factor)
-{
-   return factor == PIPE_BLENDFACTOR_DST_COLOR || factor == PIPE_BLENDFACTOR_DST_ALPHA ||
-          factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
-          factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
-}
-
 static void *si_create_blend_state_mode(struct pipe_context *ctx,
                                         const struct pipe_blend_state *state, unsigned mode)
 {
@@ -551,9 +545,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
       dstA_opt = si_translate_blend_opt_factor(dstA, true);
 
       /* Handle interdependencies. */
-      if (si_blend_factor_uses_dst(srcRGB))
+      if (util_blend_factor_uses_dest(srcRGB, false))
          dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
-      if (si_blend_factor_uses_dst(srcA))
+      if (util_blend_factor_uses_dest(srcA, false))
          dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
 
       if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
@@ -738,14 +732,15 @@ static void si_emit_clip_regs(struct si_context *sctx)
    struct si_shader_selector *vs_sel = vs->selector;
    struct si_shader_info *info = &vs_sel->info;
    struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-   unsigned window_space = info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+   bool window_space = info->stage == MESA_SHADER_VERTEX ?
+                          info->base.vs.window_space_position : 0;
    unsigned clipdist_mask = vs_sel->clipdist_mask;
    unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
    unsigned culldist_mask = vs_sel->culldist_mask;
    unsigned total_mask;
 
    if (vs->key.opt.clip_disable) {
-      assert(!info->culldist_writemask);
+      assert(!info->base.cull_distance_array_size);
       clipdist_mask = 0;
       culldist_mask = 0;
    }
@@ -763,7 +758,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
                          S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
-                         S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+                         S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
+                         S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
                          clipdist_mask | (culldist_mask << 8);
 
    if (sctx->chip_class >= GFX10) {
@@ -875,6 +871,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
    rs->polygon_mode_is_lines =
       (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
       (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
+   rs->polygon_mode_is_points =
+      (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
+      (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
    rs->pa_sc_line_stipple = state->line_stipple_enable
                                ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
                                     S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
@@ -1024,7 +1023,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
        old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
        old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
-       old_rs->force_persample_interp != rs->force_persample_interp)
+       old_rs->force_persample_interp != rs->force_persample_interp ||
+       old_rs->polygon_mode_is_points != rs->polygon_mode_is_points)
       sctx->do_update_shaders = true;
 }
 
@@ -1392,7 +1392,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
       S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
       S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
-      S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
+      S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 2 : 0));
 
    db_shader_control = sctx->ps_db_shader_control;
 
@@ -2888,8 +2888,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
       meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
       meta_read_policy =  V_02807C_CACHE_LRU_RD; /* cache reads */
    } else {
-      meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */
-      meta_read_policy =  V_02807C_CACHE_NOA_RD;    /* don't cache reads */
+      meta_write_policy = V_02807C_CACHE_STREAM;    /* write combine */
+      meta_read_policy =  V_02807C_CACHE_NOA;       /* don't cache reads */
    }
 
    /* Colorbuffers. */
@@ -3157,12 +3157,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
          radeon_emit(cs, zb->db_stencil_base >> 32);    /* DB_STENCIL_WRITE_BASE_HI */
          radeon_emit(cs, zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */
          radeon_emit(cs, /* DB_RMI_L2_CACHE_CONTROL */
-                     S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
-                     S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+                     S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
+                     S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
                      S_02807C_HTILE_WR_POLICY(meta_write_policy) |
-                     S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
-                     S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) |
-                     S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) |
+                     S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
+                     S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |
+                     S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
                      S_02807C_HTILE_RD_POLICY(meta_read_policy) |
                      S_02807C_Z_BIG_PAGE(zs_big_page) |
                      S_02807C_S_BIG_PAGE(zs_big_page));
@@ -3259,11 +3259,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
                              S_028410_CMASK_WR_POLICY(meta_write_policy) |
                              S_028410_FMASK_WR_POLICY(meta_write_policy) |
                              S_028410_DCC_WR_POLICY(meta_write_policy) |
-                             S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+                             S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
                              S_028410_CMASK_RD_POLICY(meta_read_policy) |
                              S_028410_FMASK_RD_POLICY(meta_read_policy) |
                              S_028410_DCC_RD_POLICY(meta_read_policy) |
-                             S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD) |
+                             S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA) |
                              S_028410_FMASK_BIG_PAGE(color_big_page) |
                              S_028410_COLOR_BIG_PAGE(color_big_page));
    }
@@ -3357,8 +3357,8 @@ static bool si_out_of_order_rasterization(struct si_context *sctx)
 
       /* The set of PS invocations is always order invariant,
        * except when early Z/S tests are requested. */
-      if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.writes_memory &&
-          sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] &&
+      if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.base.writes_memory &&
+          sctx->ps_shader.cso->info.base.fs.early_fragment_tests &&
           !dsa_order_invariant.pass_set)
          return false;
 
@@ -3494,7 +3494,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                      S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
                      S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
-                     S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
+                     S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3);
 
       if (sctx->framebuffer.nr_samples > 1) {
          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
@@ -3747,26 +3747,12 @@ static void gfx10_make_texture_descriptor(
       S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
       S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
       S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
-
-   if (res->target == PIPE_TEXTURE_1D ||
-       res->target == PIPE_TEXTURE_2D) {
-      /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources
-       * starting with gfx10.3 (ignored if pitch <= width). Other texture
-       * targets can't. CB and DB can't set a custom pitch for any target.
-       */
-      if (screen->info.chip_class >= GFX10_3)
-         state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1);
-      else
-         state[4] = 0;
-   } else {
-      /* Depth is the last accessible layer on gfx9+. The hw doesn't need
-       * to know the total number of layers.
-       */
-      state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ?
-                                   depth - 1 : last_layer) |
-                 S_00A010_BASE_ARRAY(first_layer);
-   }
-
+   /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
+    * to know the total number of layers.
+    */
+   state[4] =
+      S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
+      S_00A010_BASE_ARRAY(first_layer);
    state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
               S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
                                                    : tex->buffer.b.b.last_level) |
@@ -4062,43 +4048,43 @@ static void si_make_texture_descriptor(struct si_screen *screen, struct si_textu
          data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
          switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
          case FMASK(2, 1):
-            num_format = V_008F14_IMG_FMASK_8_2_1;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
             break;
          case FMASK(2, 2):
-            num_format = V_008F14_IMG_FMASK_8_2_2;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
             break;
          case FMASK(4, 1):
-            num_format = V_008F14_IMG_FMASK_8_4_1;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
             break;
          case FMASK(4, 2):
-            num_format = V_008F14_IMG_FMASK_8_4_2;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
             break;
          case FMASK(4, 4):
-            num_format = V_008F14_IMG_FMASK_8_4_4;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
             break;
          case FMASK(8, 1):
-            num_format = V_008F14_IMG_FMASK_8_8_1;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
             break;
          case FMASK(8, 2):
-            num_format = V_008F14_IMG_FMASK_16_8_2;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
             break;
          case FMASK(8, 4):
-            num_format = V_008F14_IMG_FMASK_32_8_4;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
             break;
          case FMASK(8, 8):
-            num_format = V_008F14_IMG_FMASK_32_8_8;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
             break;
          case FMASK(16, 1):
-            num_format = V_008F14_IMG_FMASK_16_16_1;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
             break;
          case FMASK(16, 2):
-            num_format = V_008F14_IMG_FMASK_32_16_2;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
             break;
          case FMASK(16, 4):
-            num_format = V_008F14_IMG_FMASK_64_16_4;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
             break;
          case FMASK(16, 8):
-            num_format = V_008F14_IMG_FMASK_64_16_8;
+            num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
             break;
          default:
             unreachable("invalid nr_samples");
@@ -4480,7 +4466,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
    } else {
       rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) |
                         S_008F38_FILTER_PREC_FIX(1) |
-                        S_008F38_ANISO_OVERRIDE_GFX6(sctx->chip_class >= GFX8);
+                        S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8);
    }
 
    /* Create sampler resource for integer textures. */
@@ -5366,7 +5352,8 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
    }
 
    if (sctx->chip_class >= GFX10_3) {
-      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
+      si_pm4_set_reg(pm4, 0x28848, 1 << 9); /* This fixes sample shading. */
    }
 
    sctx->cs_preamble_state = pm4;