radeonsi: extract TGSI memory/texture opcode handling into its own file

[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c

index fb82f8f3ee968703a3f60c14b6464a55aa7e05c2..c92a6575ede4b613fe2b2fe58305b3441d4f503d 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -404,26 +404,28 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
                 va += base_level_info->offset;
         }
  
-       if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) {
-               meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
-                         tex->dcc_offset;
-
-               if (sscreen->b.chip_class <= VI)
-                       meta_va += base_level_info->dcc_offset;
-       } else if (tex->tc_compatible_htile && !is_stencil) {
-               meta_va = tex->htile_buffer->gpu_address;
-       }
-
         state[0] = va >> 8;
         state[1] &= C_008F14_BASE_ADDRESS_HI;
         state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
  
-       state[6] &= C_008F28_COMPRESSION_EN;
-       state[7] = 0;
+       if (sscreen->b.chip_class >= VI) {
+               state[6] &= C_008F28_COMPRESSION_EN;
+               state[7] = 0;
+
+               if (vi_dcc_enabled(tex, first_level)) {
+                       meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
+                                 tex->dcc_offset;
+
+                       if (sscreen->b.chip_class <= VI)
+                               meta_va += base_level_info->dcc_offset;
+               } else if (tex->tc_compatible_htile && !is_stencil) {
+                       meta_va = tex->htile_buffer->gpu_address;
+               }
  
-       if (meta_va) {
-               state[6] |= S_008F28_COMPRESSION_EN(1);
-               state[7] = meta_va >> 8;
+               if (meta_va) {
+                       state[6] |= S_008F28_COMPRESSION_EN(1);
+                       state[7] = meta_va >> 8;
+               }
         }
  
         if (sscreen->b.chip_class >= GFX9) {
@@ -480,12 +482,21 @@ static void si_set_sampler_view(struct si_context *sctx,
  
         if (view) {
                 struct r600_texture *rtex = (struct r600_texture *)view->texture;
+               bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
+
+               if (unlikely(!is_buffer && rview->dcc_incompatible)) {
+                       if (vi_dcc_enabled(rtex, view->u.tex.first_level))
+                               if (!r600_texture_disable_dcc(&sctx->b, rtex))
+                                       sctx->b.decompress_dcc(&sctx->b.b, rtex);
+
+                       rview->dcc_incompatible = false;
+               }
  
                 assert(rtex); /* views with texture == NULL aren't supported */
                 pipe_sampler_view_reference(&views->views[slot], view);
                 memcpy(desc, rview->state, 8*4);
  
-               if (rtex->resource.b.b.target == PIPE_BUFFER) {
+               if (is_buffer) {
                         rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
  
                         si_set_buf_desc_address(&rtex->resource,
@@ -505,8 +516,7 @@ static void si_set_sampler_view(struct si_context *sctx,
                                                        desc);
                 }
  
-               if (rtex->resource.b.b.target != PIPE_BUFFER &&
-                   rtex->fmask.size) {
+               if (!is_buffer && rtex->fmask.size) {
                         memcpy(desc + 8,
                                rview->fmask_state, 8*4);
                 } else {
@@ -545,8 +555,16 @@ static void si_set_sampler_view(struct si_context *sctx,
  
  static bool is_compressed_colortex(struct r600_texture *rtex)
  {
-       return rtex->cmask.size || rtex->fmask.size ||
-              (rtex->dcc_offset && rtex->dirty_level_mask);
+       return rtex->fmask.size ||
+              (rtex->dirty_level_mask &&
+               (rtex->cmask.size || rtex->dcc_offset));
+}
+
+static bool depth_needs_decompression(struct r600_texture *rtex,
+                                     struct si_sampler_view *sview)
+{
+       return rtex->db_compatible &&
+              (!rtex->tc_compatible_htile || sview->is_stencil_sampler);
  }
  
  static void si_update_compressed_tex_shader_mask(struct si_context *sctx,
@@ -592,8 +610,7 @@ static void si_set_sampler_views(struct pipe_context *ctx,
                                 (struct r600_texture*)views[i]->texture;
                         struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
  
-                       if (rtex->db_compatible &&
-                           (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
+                       if (depth_needs_decompression(rtex, rview)) {
                                 samplers->depth_texture_mask |= 1u << slot;
                         } else {
                                 samplers->depth_texture_mask &= ~(1u << slot);
@@ -739,8 +756,7 @@ static void si_set_shader_image(struct si_context *ctx,
                 si_make_buffer_descriptor(screen, res,
                                           view->format,
                                           view->u.buf.offset,
-                                         view->u.buf.size,
-                                         descs->list + slot * 8);
+                                         view->u.buf.size, desc);
                 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
  
                 images->compressed_colortex_mask &= ~(1 << slot);
@@ -749,9 +765,8 @@ static void si_set_shader_image(struct si_context *ctx,
                 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
                 struct r600_texture *tex = (struct r600_texture *)res;
                 unsigned level = view->u.tex.level;
-               unsigned width, height, depth;
-               bool uses_dcc = tex->dcc_offset &&
-                               level < tex->surface.num_dcc_levels;
+               unsigned width, height, depth, hw_level;
+               bool uses_dcc = vi_dcc_enabled(tex, level);
  
                 assert(!tex->is_depth);
                 assert(tex->fmask.size == 0);
@@ -779,20 +794,31 @@ static void si_set_shader_image(struct si_context *ctx,
                     p_atomic_read(&tex->framebuffers_bound))
                         ctx->need_check_render_feedback = true;
  
-               /* Always force the base level to the selected level.
-                *
-                * This is required for 3D textures, where otherwise
-                * selecting a single slice for non-layered bindings
-                * fails. It doesn't hurt the other targets.
-                */
-               width = u_minify(res->b.b.width0, level);
-               height = u_minify(res->b.b.height0, level);
-               depth = u_minify(res->b.b.depth0, level);
+               if (ctx->b.chip_class >= GFX9) {
+                       /* Always set the base address. The swizzle modes don't
+                        * allow setting mipmap level offsets as the base.
+                        */
+                       width = res->b.b.width0;
+                       height = res->b.b.height0;
+                       depth = res->b.b.depth0;
+                       hw_level = level;
+               } else {
+                       /* Always force the base level to the selected level.
+                        *
+                        * This is required for 3D textures, where otherwise
+                        * selecting a single slice for non-layered bindings
+                        * fails. It doesn't hurt the other targets.
+                        */
+                       width = u_minify(res->b.b.width0, level);
+                       height = u_minify(res->b.b.height0, level);
+                       depth = u_minify(res->b.b.depth0, level);
+                       hw_level = 0;
+               }
  
                 si_make_texture_descriptor(screen, tex,
                                            false, res->b.b.target,
                                            view->format, swizzle,
-                                          0, 0,
+                                          hw_level, hw_level,
                                            view->u.tex.first_layer,
                                            view->u.tex.last_layer,
                                            width, height, depth,
@@ -982,11 +1008,11 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
  
                 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
                         continue;
-               if (!sctx->vertex_buffer[vb].buffer)
+               if (!sctx->vertex_buffer[vb].buffer.resource)
                         continue;
  
                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                                     (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
+                                     (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
                                       RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
         }
  
@@ -1045,7 +1071,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
                 uint32_t *desc = &ptr[i*4];
  
                 vb = &sctx->vertex_buffer[vbo_index];
-               rbuffer = (struct r600_resource*)vb->buffer;
+               rbuffer = (struct r600_resource*)vb->buffer.resource;
                 if (!rbuffer) {
                         memset(desc, 0, 16);
                         continue;
@@ -1061,18 +1087,18 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
  
                 if (sctx->b.chip_class != VI && vb->stride) {
                         /* Round up by rounding down and adding 1 */
-                       desc[2] = (vb->buffer->width0 - offset -
+                       desc[2] = (vb->buffer.resource->width0 - offset -
                                    velems->format_size[i]) /
                                   vb->stride + 1;
                 } else {
-                       desc[2] = vb->buffer->width0 - offset;
+                       desc[2] = vb->buffer.resource->width0 - offset;
                 }
  
                 desc[3] = velems->rsrc_word3[i];
  
                 if (first_vb_use_mask & (1 << i)) {
                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                                             (struct r600_resource*)vb->buffer,
+                                             (struct r600_resource*)vb->buffer.resource,
                                               RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
                 }
         }
@@ -1600,25 +1626,15 @@ static void si_reset_buffer_resources(struct si_context *sctx,
         }
  }
  
-/* Reallocate a buffer a update all resource bindings where the buffer is
- * bound.
- *
- * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
- * idle by discarding its contents. Apps usually tell us when to do this using
- * map_buffer flags, for example.
- */
-static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
+static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
+                            uint64_t old_va)
  {
         struct si_context *sctx = (struct si_context*)ctx;
         struct r600_resource *rbuffer = r600_resource(buf);
         unsigned i, shader;
-       uint64_t old_va = rbuffer->gpu_address;
         unsigned num_elems = sctx->vertex_elements ?
                                        sctx->vertex_elements->count : 0;
  
-       /* Reallocate the buffer in the same pipe_resource. */
-       r600_alloc_resource(&sctx->screen->b, rbuffer);
-
         /* We changed the buffer, now we need to bind it where the old one
          * was bound. This consists of 2 things:
          *   1) Updating the resource descriptor and dirtying it.
@@ -1632,10 +1648,10 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
  
                         if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
                                 continue;
-                       if (!sctx->vertex_buffer[vb].buffer)
+                       if (!sctx->vertex_buffer[vb].buffer.resource)
                                 continue;
  
-                       if (sctx->vertex_buffer[vb].buffer == buf) {
+                       if (sctx->vertex_buffer[vb].buffer.resource == buf) {
                                 sctx->vertex_buffers_dirty = true;
                                 break;
                         }
@@ -1746,6 +1762,25 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
         }
  }
  
+/* Reallocate a buffer a update all resource bindings where the buffer is
+ * bound.
+ *
+ * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
+ * idle by discarding its contents. Apps usually tell us when to do this using
+ * map_buffer flags, for example.
+ */
+static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
+{
+       struct si_context *sctx = (struct si_context*)ctx;
+       struct r600_resource *rbuffer = r600_resource(buf);
+       uint64_t old_va = rbuffer->gpu_address;
+
+       /* Reallocate the buffer in the same pipe_resource. */
+       r600_alloc_resource(&sctx->screen->b, rbuffer);
+
+       si_rebind_buffer(ctx, buf, old_va);
+}
+
  /* Update mutable image descriptor fields of all bound textures. */
  void si_update_all_texture_descriptors(struct si_context *sctx)
  {
@@ -1821,8 +1856,12 @@ static void si_set_user_data_base(struct si_context *sctx,
         if (*base != new_base) {
                 *base = new_base;
  
-               if (new_base)
+               if (new_base) {
                         si_mark_shader_pointers_dirty(sctx, shader);
+
+                       if (shader == PIPE_SHADER_VERTEX)
+                               sctx->last_vs_state = ~0;
+               }
         }
  }
  
@@ -1835,15 +1874,21 @@ static void si_set_user_data_base(struct si_context *sctx,
  void si_shader_change_notify(struct si_context *sctx)
  {
         /* VS can be bound as VS, ES, or LS. */
-       if (sctx->tes_shader.cso)
-               si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
-                                     R_00B530_SPI_SHADER_USER_DATA_LS_0);
-       else if (sctx->gs_shader.cso)
+       if (sctx->tes_shader.cso) {
+               if (sctx->b.chip_class >= GFX9) {
+                       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+                                             R_00B430_SPI_SHADER_USER_DATA_LS_0);
+               } else {
+                       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+                                             R_00B530_SPI_SHADER_USER_DATA_LS_0);
+               }
+       } else if (sctx->gs_shader.cso) {
                 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
                                       R_00B330_SPI_SHADER_USER_DATA_ES_0);
-       else
+       } else {
                 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
                                       R_00B130_SPI_SHADER_USER_DATA_VS_0);
+       }
  
         /* TES can be bound as ES, VS, or not bound. */
         if (sctx->tes_shader.cso) {
@@ -1890,12 +1935,24 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx,
                                        R_00B030_SPI_SHADER_USER_DATA_PS_0);
                 si_emit_shader_pointer(sctx, descs,
                                        R_00B130_SPI_SHADER_USER_DATA_VS_0);
-               si_emit_shader_pointer(sctx, descs,
-                                      R_00B230_SPI_SHADER_USER_DATA_GS_0);
-               si_emit_shader_pointer(sctx, descs,
-                                      R_00B330_SPI_SHADER_USER_DATA_ES_0);
-               si_emit_shader_pointer(sctx, descs,
-                                      R_00B430_SPI_SHADER_USER_DATA_HS_0);
+
+               if (sctx->b.chip_class >= GFX9) {
+                       /* GFX9 merged LS-HS and ES-GS.
+                        * Set RW_BUFFERS in the special registers, so that
+                        * it's preloaded into s[0:1] instead of s[8:9].
+                        */
+                       si_emit_shader_pointer(sctx, descs,
+                                              R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
+                       si_emit_shader_pointer(sctx, descs,
+                                              R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
+               } else {
+                       si_emit_shader_pointer(sctx, descs,
+                                              R_00B230_SPI_SHADER_USER_DATA_GS_0);
+                       si_emit_shader_pointer(sctx, descs,
+                                              R_00B330_SPI_SHADER_USER_DATA_ES_0);
+                       si_emit_shader_pointer(sctx, descs,
+                                              R_00B430_SPI_SHADER_USER_DATA_HS_0);
+               }
         }
  
         mask = sctx->shader_pointers_dirty &
@@ -1943,7 +2000,14 @@ void si_init_all_descriptors(struct si_context *sctx)
         int i;
         unsigned ce_offset = 0;
  
+       STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
+       STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
+
         for (i = 0; i < SI_NUM_SHADERS; i++) {
+               bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
+                               i == PIPE_SHADER_TESS_CTRL;
+               bool gfx9_gs = sctx->b.chip_class == GFX9 &&
+                              i == PIPE_SHADER_GEOMETRY;
                 /* GFX9 has only 4KB of CE, while previous chips had 32KB.
                  * Rarely used descriptors don't use CE RAM.
                  */
@@ -1956,22 +2020,34 @@ void si_init_all_descriptors(struct si_context *sctx)
  
                 si_init_buffer_resources(&sctx->const_buffers[i],
                                          si_const_buffer_descriptors(sctx, i),
-                                        SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
+                                        SI_NUM_CONST_BUFFERS,
+                                        gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
+                                        gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
+                                                  SI_SGPR_CONST_BUFFERS,
                                          RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
                                          &ce_offset);
                 si_init_buffer_resources(&sctx->shader_buffers[i],
                                          si_shader_buffer_descriptors(sctx, i),
-                                        SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+                                        SI_NUM_SHADER_BUFFERS,
+                                        gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
+                                        gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
+                                                  SI_SGPR_SHADER_BUFFERS,
                                          RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
                                          shaderbufs_use_ce ? &ce_offset : NULL);
  
                 si_init_descriptors(si_sampler_descriptors(sctx, i),
-                                   SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+                                   gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
+                                   gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
+                                             SI_SGPR_SAMPLERS,
+                                   16, SI_NUM_SAMPLERS,
                                     null_texture_descriptor,
                                     samplers_use_ce ? &ce_offset : NULL);
  
                 si_init_descriptors(si_image_descriptors(sctx, i),
-                                   SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+                                   gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
+                                   gfx9_gs ? GFX9_SGPR_GS_IMAGES :
+                                             SI_SGPR_IMAGES,
+                                   8, SI_NUM_IMAGES,
                                     null_image_descriptor,
                                     images_use_ce ? &ce_offset : NULL);
         }
@@ -2000,6 +2076,7 @@ void si_init_all_descriptors(struct si_context *sctx)
         sctx->b.b.set_sampler_views = si_set_sampler_views;
         sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
         sctx->b.invalidate_buffer = si_invalidate_buffer;
+       sctx->b.rebind_buffer = si_rebind_buffer;
  
         /* Shader user data. */
         si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
@@ -2007,8 +2084,18 @@ void si_init_all_descriptors(struct si_context *sctx)
  
         /* Set default and immutable mappings. */
         si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
-       si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
-       si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+
+       if (sctx->b.chip_class >= GFX9) {
+               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+                                     R_00B430_SPI_SHADER_USER_DATA_LS_0);
+               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+                                     R_00B330_SPI_SHADER_USER_DATA_ES_0);
+       } else {
+               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+                                     R_00B430_SPI_SHADER_USER_DATA_HS_0);
+               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+                                     R_00B230_SPI_SHADER_USER_DATA_GS_0);
+       }
         si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
  }