Revert "radeonsi: decrease the number of texture slots to 24"

[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c

index b3299a95b785a3d3d86617b42bfc6cd6cd33cdf6..de300764e3e4a72e6a7b60991642cba9bae8d89c 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -40,7 +40,7 @@ static void
  si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
                       struct r600_atom **list_elem)
  {
-       atom->id = list_elem - sctx->atoms.array + 1;
+       atom->id = list_elem - sctx->atoms.array;
         *list_elem = atom;
  }
  
@@ -50,7 +50,7 @@ void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
                   void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
  {
         atom->emit = (void*)emit_func;
-       atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
+       atom->id = list_elem - sctx->atoms.array;
         *list_elem = atom;
  }
  
@@ -94,13 +94,13 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
  {
         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
         struct si_state_blend *blend = sctx->queued.named.blend;
-       uint32_t cb_target_mask, i;
+       /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
+        * but you never know. */
+       uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
+       unsigned i;
  
-       /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */
         if (blend)
-               cb_target_mask = blend->cb_target_mask;
-       else
-               cb_target_mask = 0xffffffff;
+               cb_target_mask &= blend->cb_target_mask;
  
         /* Avoid a hang that happens when dual source blending is enabled
          * but there is not enough color outputs. This is undefined behavior,
@@ -453,8 +453,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                         S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
  
                 /* Only set dual source blending for MRT0 to avoid a hang. */
-               if (i >= 1 && blend->dual_src_blend)
+               if (i >= 1 && blend->dual_src_blend) {
+                       /* Vulkan does this for dual source blending. */
+                       if (i == 1)
+                               blend_cntl |= S_028780_ENABLE(1);
+
+                       si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
                         continue;
+               }
  
                 /* Only addition and subtraction equations are supported with
                  * dual source blending.
@@ -463,16 +469,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                     (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
                      eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
                         assert(!"Unsupported equation for dual source blending");
+                       si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
                         continue;
                 }
  
-               if (!state->rt[j].colormask)
-                       continue;
-
                 /* cb_render_state will disable unused ones */
                 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
  
-               if (!state->rt[j].blend_enable) {
+               if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
                         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
                         continue;
                 }
@@ -553,6 +557,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
         }
  
         if (sctx->b.family == CHIP_STONEY) {
+               /* Disable RB+ blend optimizations for dual source blending.
+                * Vulkan does this.
+                */
+               if (blend->dual_src_blend) {
+                       for (int i = 0; i < 8; i++) {
+                               sx_mrt_blend_opt[i] =
+                                       S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+                                       S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+                       }
+               }
+
                 for (int i = 0; i < 8; i++)
                         si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
                                        sx_mrt_blend_opt[i]);
@@ -654,6 +669,7 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
         unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
         unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
         unsigned total_mask;
+       bool misc_vec_ena;
  
         if (vs->key.opt.hw_vs.clip_disable) {
                 assert(!info->culldist_writemask);
@@ -662,6 +678,18 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
         }
         total_mask = clipdist_mask | culldist_mask;
  
+       /* Clip distances on points have no effect, so need to be implemented
+        * as cull distances. This applies for the clipvertex case as well.
+        *
+        * Setting this for primitives other than points should have no adverse
+        * effects.
+        */
+       clipdist_mask &= rs->clip_plane_enable;
+       culldist_mask |= clipdist_mask;
+
+       misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
+                      info->writes_layer || info->writes_viewport_index;
+
         radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
                 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
                 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
@@ -669,13 +697,9 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
                 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
                 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
                 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
-               S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
-                                           info->writes_edgeflag ||
-                                           info->writes_layer ||
-                                            info->writes_viewport_index) |
-               S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
-               (rs->clip_plane_enable &
-                clipdist_mask) | (culldist_mask << 8));
+               S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+               S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+               clipdist_mask | (culldist_mask << 8));
         radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
                 rs->pa_cl_clip_cntl |
                 ucp_mask |
@@ -693,8 +717,10 @@ static void si_update_poly_offset_state(struct si_context *sctx)
  {
         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
  
-       if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
+       if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
+               si_pm4_bind_state(sctx, poly_offset, NULL);
                 return;
+       }
  
         /* Use the user format, not db_render_format, so that the polygon
          * offset behaves as expected by applications.
@@ -892,6 +918,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
         si_update_poly_offset_state(sctx);
  
         si_mark_atom_dirty(sctx, &sctx->clip_regs);
+       sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
+               rs->line_stipple_enable;
         sctx->do_update_shaders = true;
  }
  
@@ -1339,11 +1367,17 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
                 case PIPE_FORMAT_Z16_UNORM:
                         return V_008F14_IMG_DATA_FORMAT_16;
                 case PIPE_FORMAT_X24S8_UINT:
+               case PIPE_FORMAT_S8X24_UINT:
+                       /*
+                        * Implemented as an 8_8_8_8 data format to fix texture
+                        * gathers in stencil sampling. This affects at least
+                        * GL45-CTS.texture_cube_map_array.sampling on VI.
+                        */
+                       return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
                 case PIPE_FORMAT_Z24X8_UNORM:
                 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
                         return V_008F14_IMG_DATA_FORMAT_8_24;
                 case PIPE_FORMAT_X8Z24_UNORM:
-               case PIPE_FORMAT_S8X24_UINT:
                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
                         return V_008F14_IMG_DATA_FORMAT_24_8;
                 case PIPE_FORMAT_S8_UINT:
@@ -1673,17 +1707,12 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
                                                const struct util_format_description *desc,
                                                int first_non_void)
  {
-       unsigned type;
         int i;
  
         if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
                 return V_008F0C_BUF_DATA_FORMAT_10_11_11;
  
         assert(first_non_void >= 0);
-       type = desc->channel[first_non_void].type;
-
-       if (type == UTIL_FORMAT_TYPE_FIXED)
-               return V_008F0C_BUF_DATA_FORMAT_INVALID;
  
         if (desc->nr_channels == 4 &&
             desc->channel[0].size == 10 &&
@@ -1722,14 +1751,6 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
                 }
                 break;
         case 32:
-               /* From the Southern Islands ISA documentation about MTBUF:
-                * 'Memory reads of data in memory that is 32 or 64 bits do not
-                * undergo any format conversion.'
-                */
-               if (type != UTIL_FORMAT_TYPE_FLOAT &&
-                   !desc->channel[first_non_void].pure_integer)
-                       return V_008F0C_BUF_DATA_FORMAT_INVALID;
-
                 switch (desc->nr_channels) {
                 case 1:
                         return V_008F0C_BUF_DATA_FORMAT_32;
@@ -1757,18 +1778,21 @@ static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
  
         switch (desc->channel[first_non_void].type) {
         case UTIL_FORMAT_TYPE_SIGNED:
-               if (desc->channel[first_non_void].normalized)
-                       return V_008F0C_BUF_NUM_FORMAT_SNORM;
-               else if (desc->channel[first_non_void].pure_integer)
+       case UTIL_FORMAT_TYPE_FIXED:
+               if (desc->channel[first_non_void].size >= 32 ||
+                   desc->channel[first_non_void].pure_integer)
                         return V_008F0C_BUF_NUM_FORMAT_SINT;
+               else if (desc->channel[first_non_void].normalized)
+                       return V_008F0C_BUF_NUM_FORMAT_SNORM;
                 else
                         return V_008F0C_BUF_NUM_FORMAT_SSCALED;
                 break;
         case UTIL_FORMAT_TYPE_UNSIGNED:
-               if (desc->channel[first_non_void].normalized)
-                       return V_008F0C_BUF_NUM_FORMAT_UNORM;
-               else if (desc->channel[first_non_void].pure_integer)
+               if (desc->channel[first_non_void].size >= 32 ||
+                   desc->channel[first_non_void].pure_integer)
                         return V_008F0C_BUF_NUM_FORMAT_UINT;
+               else if (desc->channel[first_non_void].normalized)
+                       return V_008F0C_BUF_NUM_FORMAT_UNORM;
                 else
                         return V_008F0C_BUF_NUM_FORMAT_USCALED;
                 break;
@@ -2360,6 +2384,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
         si_dec_framebuffer_counters(&sctx->framebuffer.state);
         util_copy_framebuffer_state(&sctx->framebuffer.state, state);
  
+       sctx->framebuffer.colorbuf_enabled_4bit = 0;
         sctx->framebuffer.spi_shader_col_format = 0;
         sctx->framebuffer.spi_shader_col_format_alpha = 0;
         sctx->framebuffer.spi_shader_col_format_blend = 0;
@@ -2382,6 +2407,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                         si_initialize_color_surface(sctx, surf);
                 }
  
+               sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
                 sctx->framebuffer.spi_shader_col_format |=
                         surf->spi_shader_col_format << (i * 4);
                 sctx->framebuffer.spi_shader_col_format_alpha |=
@@ -2463,6 +2489,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
  
         sctx->need_check_render_feedback = true;
         sctx->do_update_shaders = true;
+       sctx->framebuffer.do_update_surf_dirtiness = true;
  }
  
  static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
@@ -2778,14 +2805,22 @@ si_make_texture_descriptor(struct si_screen *screen,
         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
                 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
                 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
+               const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
  
                 switch (pipe_format) {
                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-               case PIPE_FORMAT_X24S8_UINT:
                 case PIPE_FORMAT_X32_S8X24_UINT:
                 case PIPE_FORMAT_X8Z24_UNORM:
                         util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
                         break;
+               case PIPE_FORMAT_X24S8_UINT:
+                       /*
+                        * X24S8 is implemented as an 8_8_8_8 data format, to
+                        * fix texture gathers. This affects at least
+                        * GL45-CTS.texture_cube_map_array.sampling on VI.
+                        */
+                       util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
+                       break;
                 default:
                         util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
                 }
@@ -3230,6 +3265,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                 }
         }
  
+#ifdef DEBUG
+       rstate->magic = SI_SAMPLER_STATE_MAGIC;
+#endif
         rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
                           S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
                           S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
@@ -3286,6 +3324,12 @@ static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
  
  static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
  {
+#ifdef DEBUG
+       struct si_sampler_state *s = state;
+
+       assert(s->magic == SI_SAMPLER_STATE_MAGIC);
+       s->magic = 0;
+#endif
         free(state);
  }
  
@@ -3298,6 +3342,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                                        const struct pipe_vertex_element *elements)
  {
         struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
+       bool used[SI_NUM_VERTEX_BUFFERS] = {};
         int i;
  
         assert(count <= SI_MAX_ATTRIBS);
@@ -3307,13 +3352,26 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
         v->count = count;
         for (i = 0; i < count; ++i) {
                 const struct util_format_description *desc;
+               const struct util_format_channel_description *channel;
                 unsigned data_format, num_format;
                 int first_non_void;
+               unsigned vbo_index = elements[i].vertex_buffer_index;
+
+               if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
+                       FREE(v);
+                       return NULL;
+               }
+
+               if (!used[vbo_index]) {
+                       v->first_vb_use_mask |= 1 << i;
+                       used[vbo_index] = true;
+               }
  
                 desc = util_format_description(elements[i].src_format);
                 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
                 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
                 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+               channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
  
                 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
                                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
@@ -3328,12 +3386,37 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
                  */
                 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
                         if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
-                               v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
                         } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
-                               v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
                         } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
                                 /* This isn't actually used in OpenGL. */
-                               v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
+                       }
+               } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
+                       if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
+                       else
+                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
+               } else if (channel && channel->size == 32 && !channel->pure_integer) {
+                       if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
+                               if (channel->normalized) {
+                                       if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+                                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
+                                       else
+                                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
+                               } else {
+                                       v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
+                               }
+                       } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
+                               if (channel->normalized) {
+                                       if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+                                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
+                                       else
+                                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
+                               } else {
+                                       v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
+                               }
                         }
                 }
  
@@ -3446,14 +3529,14 @@ static void si_set_tess_state(struct pipe_context *ctx,
         pipe_resource_reference(&cb.buffer, NULL);
  }
  
-static void si_texture_barrier(struct pipe_context *ctx)
+static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
  {
         struct si_context *sctx = (struct si_context *)ctx;
  
         sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
                          SI_CONTEXT_INV_GLOBAL_L2 |
-                        SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_CS_PARTIAL_FLUSH;
+                        SI_CONTEXT_FLUSH_AND_INV_CB;
+       sctx->framebuffer.do_update_surf_dirtiness = true;
  }
  
  /* This only ensures coherency for shader image/buffer stores. */
@@ -3910,6 +3993,7 @@ static void si_init_config(struct si_context *sctx)
                 raster_config_1 = 0x0000002a;
                 break;
         case CHIP_POLARIS11:
+       case CHIP_POLARIS12:
                 raster_config = 0x16000012;
                 raster_config_1 = 0x00000000;
                 break;