radeonsi: move VS_STATE.LS_OUT_PATCH_SIZE a few bits higher to make space there
[mesa.git] / src / gallium / drivers / radeonsi / si_blit.c
index 610de289a20e4f8c64200653713b4b67d1842214..5ac68406f7fb467f717a2d322f5de3e69b523866 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "si_pipe.h"
 #include "si_compute.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_log.h"
 #include "util/u_surface.h"
 
@@ -58,7 +58,7 @@ void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
                util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
                util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
                util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask);
-               util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+               util_blitter_save_scissor(sctx->blitter, &sctx->scissors[0]);
                util_blitter_save_window_rectangles(sctx->blitter,
                                                    sctx->window_rectangles_include,
                                                    sctx->num_window_rectangles,
@@ -99,6 +99,7 @@ void si_blitter_end(struct si_context *sctx)
         * non-global VS user SGPRs. */
        sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
        sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
+       sctx->vertex_buffer_user_sgprs_dirty = sctx->num_vertex_elements > 0;
        si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
 }
 
@@ -180,31 +181,6 @@ si_blit_dbcb_copy(struct si_context *sctx,
        return fully_copied_levels;
 }
 
-void si_blit_decompress_depth(struct pipe_context *ctx,
-                             struct si_texture *texture,
-                             struct si_texture *staging,
-                             unsigned first_level, unsigned last_level,
-                             unsigned first_layer, unsigned last_layer,
-                             unsigned first_sample, unsigned last_sample)
-{
-       const struct util_format_description *desc;
-       unsigned planes = 0;
-
-       assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");
-
-       desc = util_format_description(staging->buffer.b.b.format);
-
-       if (util_format_has_depth(desc))
-               planes |= PIPE_MASK_Z;
-       if (util_format_has_stencil(desc))
-               planes |= PIPE_MASK_S;
-
-       si_blit_dbcb_copy(
-               (struct si_context *)ctx, texture, staging, planes,
-               u_bit_consecutive(first_level, last_level - first_level + 1),
-               first_layer, last_layer, first_sample, last_sample);
-}
-
 /* Helper function for si_blit_decompress_zs_in_place.
  */
 static void
@@ -355,7 +331,7 @@ si_decompress_depth(struct si_context *sctx,
         */
        if (copy_planes &&
            (tex->flushed_depth_texture ||
-            si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b, NULL))) {
+            si_init_flushed_depth_texture(&sctx->b, &tex->buffer.b.b))) {
                struct si_texture *dst = tex->flushed_depth_texture;
                unsigned fully_copied_levels;
                unsigned levels = 0;
@@ -386,8 +362,9 @@ si_decompress_depth(struct si_context *sctx,
        }
 
        if (inplace_planes) {
-               bool has_htile = si_htile_enabled(tex, first_level);
-               bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level);
+               bool has_htile = si_htile_enabled(tex, first_level, inplace_planes);
+               bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level,
+                                                                 inplace_planes);
 
                /* Don't decompress if there is no HTILE or when HTILE is
                 * TC-compatible. */
@@ -456,7 +433,8 @@ static void si_blit_decompress_color(struct si_context *sctx,
                                     struct si_texture *tex,
                                     unsigned first_level, unsigned last_level,
                                     unsigned first_layer, unsigned last_layer,
-                                    bool need_dcc_decompress)
+                                    bool need_dcc_decompress,
+                                    bool need_fmask_expand)
 {
        void* custom_blend;
        unsigned layer, checked_last_layer, max_layer;
@@ -466,7 +444,7 @@ static void si_blit_decompress_color(struct si_context *sctx,
        if (!need_dcc_decompress)
                level_mask &= tex->dirty_level_mask;
        if (!level_mask)
-               return;
+               goto expand_fmask;
 
        if (unlikely(sctx->log))
                u_log_printf(sctx->log,
@@ -477,7 +455,7 @@ static void si_blit_decompress_color(struct si_context *sctx,
        if (need_dcc_decompress) {
                custom_blend = sctx->custom_blend_dcc_decompress;
 
-               assert(tex->dcc_offset);
+               assert(tex->surface.dcc_offset);
 
                /* disable levels without DCC */
                for (int i = first_level; i <= last_level; i++) {
@@ -536,19 +514,26 @@ static void si_blit_decompress_color(struct si_context *sctx,
        si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples,
                                   vi_dcc_enabled(tex, first_level),
                                   tex->surface.u.gfx9.dcc.pipe_aligned);
+
+expand_fmask:
+       if (need_fmask_expand && tex->surface.fmask_offset && !tex->fmask_is_identity) {
+               si_compute_expand_fmask(&sctx->b, &tex->buffer.b.b);
+               tex->fmask_is_identity = true;
+       }
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex,
-                           unsigned first_level, unsigned last_level)
+                           unsigned first_level, unsigned last_level,
+                           bool need_fmask_expand)
 {
        /* CMASK or DCC can be discarded and we can still end up here. */
-       if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset)
+       if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->surface.dcc_offset)
                return;
 
        si_blit_decompress_color(sctx, tex, first_level, last_level, 0,
                                 util_max_layer(&tex->buffer.b.b, first_level),
-                                false);
+                                false, need_fmask_expand);
 }
 
 static void
@@ -570,7 +555,7 @@ si_decompress_sampler_color_textures(struct si_context *sctx,
                tex = (struct si_texture *)view->texture;
 
                si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
-                                           view->u.tex.last_level);
+                                           view->u.tex.last_level, false);
        }
 }
 
@@ -593,7 +578,8 @@ si_decompress_image_color_textures(struct si_context *sctx,
                tex = (struct si_texture *)view->resource;
 
                si_decompress_color_texture(sctx, tex, view->u.tex.level,
-                                           view->u.tex.level);
+                                           view->u.tex.level,
+                                           view->access & PIPE_IMAGE_ACCESS_WRITE);
        }
 }
 
@@ -606,7 +592,7 @@ static void si_check_render_feedback_texture(struct si_context *sctx,
 {
        bool render_feedback = false;
 
-       if (!tex->dcc_offset)
+       if (!tex->surface.dcc_offset)
                return;
 
        for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) {
@@ -753,7 +739,7 @@ static void si_decompress_resident_textures(struct si_context *sctx)
                struct si_texture *tex = (struct si_texture *)view->texture;
 
                si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
-                                           view->u.tex.last_level);
+                                           view->u.tex.last_level, false);
        }
 
        util_dynarray_foreach(&sctx->resident_tex_needs_depth_decompress,
@@ -777,7 +763,8 @@ static void si_decompress_resident_images(struct si_context *sctx)
                struct si_texture *tex = (struct si_texture *)view->resource;
 
                si_decompress_color_texture(sctx, tex, view->u.tex.level,
-                                           view->u.tex.level);
+                                           view->u.tex.level,
+                                           view->access & PIPE_IMAGE_ACCESS_WRITE);
        }
 }
 
@@ -822,14 +809,14 @@ void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
                        si_decompress_color_texture(sctx,
                                                    (struct si_texture*)cb0->texture,
                                                    cb0->u.tex.first_layer,
-                                                   cb0->u.tex.last_layer);
+                                                   cb0->u.tex.last_layer, false);
                }
 
                si_check_render_feedback(sctx);
        } else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
-               if (sctx->cs_shader_state.program->uses_bindless_samplers)
+               if (sctx->cs_shader_state.program->sel.info.uses_bindless_samplers)
                        si_decompress_resident_textures(sctx);
-               if (sctx->cs_shader_state.program->uses_bindless_images)
+               if (sctx->cs_shader_state.program->sel.info.uses_bindless_images)
                        si_decompress_resident_images(sctx);
        }
 }
@@ -864,7 +851,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
                si_decompress_depth(sctx, stex, planes,
                                    level, level,
                                    first_layer, last_layer);
-       } else if (stex->surface.fmask_size || stex->cmask_buffer || stex->dcc_offset) {
+       } else if (stex->surface.fmask_size || stex->cmask_buffer || stex->surface.dcc_offset) {
                /* If we've rendered into the framebuffer and it's a blitting
                 * source, make sure the decompression pass is invoked
                 * by dirtying the framebuffer.
@@ -879,7 +866,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
                }
 
                si_blit_decompress_color(sctx, stex, level, level,
-                                        first_layer, last_layer, false);
+                                        first_layer, last_layer, false, false);
        }
 }
 
@@ -920,7 +907,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
            !util_format_is_compressed(dst->format) &&
            !util_format_is_depth_or_stencil(src->format) &&
            src->nr_samples <= 1 &&
-           !sdst->dcc_offset &&
+           !sdst->surface.dcc_offset &&
            !(dst->target != src->target &&
              (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
                si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box);
@@ -1086,7 +1073,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_texture *src = (struct si_texture*)info->src.resource;
        struct si_texture *dst = (struct si_texture*)info->dst.resource;
-       MAYBE_UNUSED struct si_texture *stmp;
+       ASSERTED struct si_texture *stmp;
        unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
        unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
        enum pipe_format format = info->src.format;
@@ -1134,6 +1121,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
                        /* The next fast clear will switch to this mode to
                         * get direct hw resolve next time if the mode is
                         * different now.
+                        *
+                        * TODO-GFX10: This does not work in GFX10 because MSAA
+                        * is restricted to 64KB_R_X and 64KB_Z_X swizzle modes.
+                        * In some cases we could change the swizzle of the
+                        * destination texture instead, but the more general
+                        * solution is to implement compute shader resolve.
                         */
                        src->last_msaa_resolve_target_micro_mode =
                                dst->surface.micro_tile_mode;
@@ -1145,18 +1138,10 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
                 * This is still the fastest codepath even with this clear.
                 */
                if (vi_dcc_enabled(dst, info->dst.level)) {
-                       /* TODO: Implement per-level DCC clears for GFX9. */
-                       if (sctx->chip_class >= GFX9 &&
-                           info->dst.resource->last_level != 0)
+                       if (!vi_dcc_clear_level(sctx, dst, info->dst.level,
+                                               DCC_UNCOMPRESSED))
                                goto resolve_to_temp;
 
-                       /* This can happen with mipmapping. */
-                       if (sctx->chip_class == VI &&
-                           !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size)
-                               goto resolve_to_temp;
-
-                       vi_dcc_clear_level(sctx, dst, info->dst.level,
-                                          0xFFFFFFFF);
                        dst->dirty_level_mask &= ~(1 << info->dst.level);
                }
 
@@ -1179,10 +1164,13 @@ resolve_to_temp:
        templ.array_size = 1;
        templ.usage = PIPE_USAGE_DEFAULT;
        templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING |
+                     SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE |
+                     SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) |
                      SI_RESOURCE_FLAG_DISABLE_DCC;
 
        /* The src and dst microtile modes must be the same. */
-       if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
+       if (sctx->chip_class <= GFX8 &&
+           src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
                templ.bind = PIPE_BIND_SCANOUT;
        else
                templ.bind = 0;
@@ -1229,7 +1217,6 @@ static void si_blit(struct pipe_context *ctx,
         * on failure (recursion).
         */
        if (dst->surface.is_linear &&
-           sctx->dma_copy &&
            util_can_blit_via_copy_region(info, false)) {
                sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
                                 info->dst.box.x, info->dst.box.y,
@@ -1249,12 +1236,12 @@ static void si_blit(struct pipe_context *ctx,
        vi_disable_dcc_if_incompatible_format(sctx, info->dst.resource,
                                              info->dst.level,
                                              info->dst.format);
-       si_decompress_subresource(ctx, info->src.resource, info->mask,
+       si_decompress_subresource(ctx, info->src.resource, PIPE_MASK_RGBAZS,
                                  info->src.level,
                                  info->src.box.z,
                                  info->src.box.z + info->src.box.depth - 1);
 
-       if (sctx->screen->debug_flags & DBG(FORCE_DMA) &&
+       if (sctx->screen->debug_flags & DBG(FORCE_SDMA) &&
            util_try_blit_via_copy_region(ctx, info))
                return;
 
@@ -1264,11 +1251,11 @@ static void si_blit(struct pipe_context *ctx,
        si_blitter_end(sctx);
 }
 
-static boolean si_generate_mipmap(struct pipe_context *ctx,
-                                 struct pipe_resource *tex,
-                                 enum pipe_format format,
-                                 unsigned base_level, unsigned last_level,
-                                 unsigned first_layer, unsigned last_layer)
+static bool si_generate_mipmap(struct pipe_context *ctx,
+                              struct pipe_resource *tex,
+                              enum pipe_format format,
+                              unsigned base_level, unsigned last_level,
+                              unsigned first_layer, unsigned last_layer)
 {
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_texture *stex = (struct si_texture *)tex;
@@ -1314,13 +1301,15 @@ static void si_flush_resource(struct pipe_context *ctx,
        if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty)
                return;
 
-       if (!tex->is_depth && (tex->cmask_buffer || tex->dcc_offset)) {
+       if (!tex->is_depth && (tex->cmask_buffer || tex->surface.dcc_offset)) {
                si_blit_decompress_color(sctx, tex, 0, res->last_level,
                                         0, util_max_layer(res, 0),
-                                        tex->dcc_separate_buffer != NULL);
+                                        tex->dcc_separate_buffer != NULL, false);
 
-               if (tex->display_dcc_offset)
+               if (tex->surface.display_dcc_offset && tex->displayable_dcc_dirty) {
                        si_retile_dcc(sctx, tex);
+                       tex->displayable_dcc_dirty = false;
+               }
        }
 
        /* Always do the analysis even if DCC is disabled at the moment. */
@@ -1359,12 +1348,12 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
        /* If graphics is disabled, we can't decompress DCC, but it shouldn't
         * be compressed either. The caller should simply discard it.
         */
-       if (!tex->dcc_offset || !sctx->has_graphics)
+       if (!tex->surface.dcc_offset || !sctx->has_graphics)
                return;
 
        si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
                                 0, util_max_layer(&tex->buffer.b.b, 0),
-                                true);
+                                true, false);
 }
 
 void si_init_blit_functions(struct si_context *sctx)