radeonsi: remove r600_ring::flush callback
[mesa.git] / src / gallium / drivers / radeonsi / si_descriptors.c
index 4a0cc29091144005508ff00fb13b23b0cabb3cf4..984dffa24787bea886ea58ae4123a1f61c801573 100644 (file)
@@ -96,7 +96,13 @@ static uint32_t null_image_descriptor[8] = {
 
 static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
 {
-       return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+       uint64_t va = desc[0] |
+                     ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+
+       /* Sign-extend the 48-bit address. */
+       if (va & (1ull << 47))
+               va |= 0xffffull << 48;
+       return va;
 }
 
 static void si_init_descriptor_list(uint32_t *desc_list,
@@ -161,11 +167,10 @@ static bool si_upload_descriptors(struct si_context *sctx,
        }
 
        uint32_t *ptr;
-       int buffer_offset;
-       u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
+       unsigned buffer_offset;
+       u_upload_alloc(sctx->b.b.const_uploader, first_slot_offset, upload_size,
                       si_optimal_tcc_alignment(sctx, upload_size),
-                      (unsigned*)&buffer_offset,
-                      (struct pipe_resource**)&desc->buffer,
+                      &buffer_offset, (struct pipe_resource**)&desc->buffer,
                       (void**)&ptr);
        if (!desc->buffer) {
                desc->gpu_address = 0;
@@ -183,6 +188,10 @@ static bool si_upload_descriptors(struct si_context *sctx,
        buffer_offset -= first_slot_offset;
        desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
 
+       assert(desc->buffer->flags & RADEON_FLAG_32BIT);
+       assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi);
+       assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi);
+
        si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
        return true;
 }
@@ -256,9 +265,8 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
        rres = (struct r600_resource*)resource;
        priority = si_get_sampler_view_priority(rres);
 
-       radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                           rres, usage, priority,
-                                           check_mem);
+       radeon_add_to_gfx_buffer_list_check_mem(sctx, rres, usage, priority,
+                                               check_mem);
 
        if (resource->target == PIPE_BUFFER)
                return;
@@ -266,9 +274,8 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
        /* Now add separate DCC or HTILE. */
        rtex = (struct r600_texture*)resource;
        if (rtex->dcc_separate_buffer) {
-               radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                                   rtex->dcc_separate_buffer, usage,
-                                                   RADEON_PRIO_DCC, check_mem);
+               radeon_add_to_gfx_buffer_list_check_mem(sctx, rtex->dcc_separate_buffer,
+                                                       usage, RADEON_PRIO_DCC, check_mem);
        }
 }
 
@@ -435,7 +442,7 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
        if (unlikely(!is_buffer && sview->dcc_incompatible)) {
                if (vi_dcc_enabled(rtex, view->u.tex.first_level))
                        if (!si_texture_disable_dcc(&sctx->b, rtex))
-                               sctx->b.decompress_dcc(&sctx->b.b, rtex);
+                               si_decompress_dcc(&sctx->b.b, rtex);
 
                sview->dcc_incompatible = false;
        }
@@ -678,7 +685,7 @@ si_mark_image_range_valid(const struct pipe_image_view *view)
 static void si_set_shader_image_desc(struct si_context *ctx,
                                     const struct pipe_image_view *view,
                                     bool skip_decompress,
-                                    uint32_t *desc)
+                                    uint32_t *desc, uint32_t *fmask_desc)
 {
        struct si_screen *screen = ctx->screen;
        struct r600_resource *res;
@@ -706,13 +713,14 @@ static void si_set_shader_image_desc(struct si_context *ctx,
                 * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
                 * so we don't wanna trigger it.
                 */
-               if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
+               if (tex->is_depth ||
+                   (!fmask_desc && tex->fmask.size != 0)) {
                        assert(!"Z/S and MSAA image stores are not supported");
                        access &= ~PIPE_IMAGE_ACCESS_WRITE;
                }
 
                assert(!tex->is_depth);
-               assert(tex->fmask.size == 0);
+               assert(fmask_desc || tex->fmask.size == 0);
 
                if (uses_dcc && !skip_decompress &&
                    (view->access & PIPE_IMAGE_ACCESS_WRITE ||
@@ -722,7 +730,7 @@ static void si_set_shader_image_desc(struct si_context *ctx,
                         * has been decompressed already.
                         */
                        if (!si_texture_disable_dcc(&ctx->b, tex))
-                               ctx->b.decompress_dcc(&ctx->b.b, tex);
+                               si_decompress_dcc(&ctx->b.b, tex);
                }
 
                if (ctx->b.chip_class >= GFX9) {
@@ -753,7 +761,7 @@ static void si_set_shader_image_desc(struct si_context *ctx,
                                           view->u.tex.first_layer,
                                           view->u.tex.last_layer,
                                           width, height, depth,
-                                          desc, NULL);
+                                          desc, fmask_desc);
                si_set_mutable_tex_desc_fields(screen, tex,
                                               &tex->surface.u.legacy.level[level],
                                               level, level,
@@ -783,7 +791,7 @@ static void si_set_shader_image(struct si_context *ctx,
        if (&images->views[slot] != view)
                util_copy_image_view(&images->views[slot], view);
 
-       si_set_shader_image_desc(ctx, view, skip_decompress, desc);
+       si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL);
 
        if (res->b.b.target == PIPE_BUFFER) {
                images->needs_color_decompress_mask &= ~(1 << slot);
@@ -861,6 +869,77 @@ si_images_update_needs_color_decompress_mask(struct si_images *images)
        }
 }
 
+void si_update_ps_colorbuf0_slot(struct si_context *sctx)
+{
+       struct si_buffer_resources *buffers = &sctx->rw_buffers;
+       struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+       unsigned slot = SI_PS_IMAGE_COLORBUF0;
+       struct pipe_surface *surf = NULL;
+
+       /* si_texture_disable_dcc can get us here again. */
+       if (sctx->blitter->running)
+               return;
+
+       /* See whether FBFETCH is used and color buffer 0 is set. */
+       if (sctx->ps_shader.cso &&
+           sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] &&
+           sctx->framebuffer.state.nr_cbufs &&
+           sctx->framebuffer.state.cbufs[0])
+               surf = sctx->framebuffer.state.cbufs[0];
+
+       /* Return if FBFETCH transitions from disabled to disabled. */
+       if (!buffers->buffers[slot] && !surf)
+               return;
+
+       sctx->ps_uses_fbfetch = surf != NULL;
+       si_update_ps_iter_samples(sctx);
+
+       if (surf) {
+               struct r600_texture *tex = (struct r600_texture*)surf->texture;
+               struct pipe_image_view view;
+
+               assert(tex);
+               assert(!tex->is_depth);
+
+               /* Disable DCC, because the texture is used as both a sampler
+                * and color buffer.
+                */
+               si_texture_disable_dcc(&sctx->b, tex);
+
+               if (tex->resource.b.b.nr_samples <= 1 && tex->cmask_buffer) {
+                       /* Disable CMASK. */
+                       assert(tex->cmask_buffer != &tex->resource);
+                       si_eliminate_fast_color_clear(&sctx->b, tex);
+                       si_texture_discard_cmask(sctx->screen, tex);
+               }
+
+               view.resource = surf->texture;
+               view.format = surf->format;
+               view.access = PIPE_IMAGE_ACCESS_READ;
+               view.u.tex.first_layer = surf->u.tex.first_layer;
+               view.u.tex.last_layer = surf->u.tex.last_layer;
+               view.u.tex.level = surf->u.tex.level;
+
+               /* Set the descriptor. */
+               uint32_t *desc = descs->list + slot*4;
+               memset(desc, 0, 16 * 4);
+               si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);
+
+               pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b);
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+                                         &tex->resource, RADEON_USAGE_READ,
+                                         RADEON_PRIO_SHADER_RW_IMAGE);
+               buffers->enabled_mask |= 1u << slot;
+       } else {
+               /* Clear the descriptor. */
+               memset(descs->list + slot*4, 0, 8*4);
+               pipe_resource_reference(&buffers->buffers[slot], NULL);
+               buffers->enabled_mask &= ~(1u << slot);
+       }
+
+       sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
 /* SAMPLER STATES */
 
 static void si_bind_sampler_states(struct pipe_context *ctx,
@@ -1064,9 +1143,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
                        continue;
                }
 
-               int offset = (int)vb->buffer_offset + (int)velems->src_offset[i];
-               int64_t va = (int64_t)rbuffer->gpu_address + offset;
-               assert(va > 0);
+               int64_t offset = (int64_t)((int)vb->buffer_offset) +
+                                velems->src_offset[i];
+               uint64_t va = rbuffer->gpu_address + offset;
 
                int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
                if (sctx->b.chip_class != VI && vb->stride) {
@@ -1182,10 +1261,10 @@ static void si_set_constant_buffer(struct si_context *sctx,
                          S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
                buffers->buffers[slot] = buffer;
-               radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                                   (struct r600_resource*)buffer,
-                                                   buffers->shader_usage_constbuf,
-                                                   buffers->priority_constbuf, true);
+               radeon_add_to_gfx_buffer_list_check_mem(sctx,
+                                                       (struct r600_resource*)buffer,
+                                                       buffers->shader_usage_constbuf,
+                                                       buffers->priority_constbuf, true);
                buffers->enabled_mask |= 1u << slot;
        } else {
                /* Clear the descriptor. */
@@ -1280,9 +1359,9 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
                          S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
                pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
-               radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
-                                                   buffers->shader_usage,
-                                                   buffers->priority, true);
+               radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
+                                                       buffers->shader_usage,
+                                                       buffers->priority, true);
                buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
 
                buffers->enabled_mask |= 1u << slot;
@@ -1522,15 +1601,18 @@ static void si_reset_buffer_resources(struct si_context *sctx,
                                                    old_va, buf);
                        sctx->descriptors_dirty |= 1u << descriptors_idx;
 
-                       radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                                           (struct r600_resource *)buf,
-                                                           usage, priority, true);
+                       radeon_add_to_gfx_buffer_list_check_mem(sctx,
+                                                               (struct r600_resource *)buf,
+                                                               usage, priority, true);
                }
        }
 }
 
-static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
-                            uint64_t old_va)
+/* Update all resource bindings where the buffer is bound, including
+ * all resource descriptors. This is invalidate_buffer without
+ * the invalidation. */
+void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
+                     uint64_t old_va)
 {
        struct si_context *sctx = (struct si_context*)ctx;
        struct r600_resource *rbuffer = r600_resource(buf);
@@ -1575,10 +1657,10 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
                                                    old_va, buf);
                        sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 
-                       radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                                           rbuffer, buffers->shader_usage,
-                                                           RADEON_PRIO_SHADER_RW_BUFFER,
-                                                           true);
+                       radeon_add_to_gfx_buffer_list_check_mem(sctx,
+                                                               rbuffer, buffers->shader_usage,
+                                                               RADEON_PRIO_SHADER_RW_BUFFER,
+                                                               true);
 
                        /* Update the streamout state. */
                        if (sctx->streamout.begin_emitted)
@@ -1630,7 +1712,7 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
                                        sctx->descriptors_dirty |=
                                                1u << si_sampler_and_image_descriptors_idx(shader);
 
-                                       radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+                                       radeon_add_to_gfx_buffer_list_check_mem(sctx,
                                                                            rbuffer, RADEON_USAGE_READ,
                                                                            RADEON_PRIO_SAMPLER_BUFFER,
                                                                            true);
@@ -1662,8 +1744,8 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
                                        sctx->descriptors_dirty |=
                                                1u << si_sampler_and_image_descriptors_idx(shader);
 
-                                       radeon_add_to_buffer_list_check_mem(
-                                               &sctx->b, &sctx->b.gfx, rbuffer,
+                                       radeon_add_to_gfx_buffer_list_check_mem(
+                                               sctx, rbuffer,
                                                RADEON_USAGE_READWRITE,
                                                RADEON_PRIO_SAMPLER_BUFFER, true);
                                }
@@ -1689,8 +1771,8 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
                                (*tex_handle)->desc_dirty = true;
                                sctx->bindless_descriptors_dirty = true;
 
-                               radeon_add_to_buffer_list_check_mem(
-                                       &sctx->b, &sctx->b.gfx, rbuffer,
+                               radeon_add_to_gfx_buffer_list_check_mem(
+                                       sctx, rbuffer,
                                        RADEON_USAGE_READ,
                                        RADEON_PRIO_SAMPLER_BUFFER, true);
                        }
@@ -1718,8 +1800,8 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
                                (*img_handle)->desc_dirty = true;
                                sctx->bindless_descriptors_dirty = true;
 
-                               radeon_add_to_buffer_list_check_mem(
-                                       &sctx->b, &sctx->b.gfx, rbuffer,
+                               radeon_add_to_gfx_buffer_list_check_mem(
+                                       sctx, rbuffer,
                                        RADEON_USAGE_READWRITE,
                                        RADEON_PRIO_SAMPLER_BUFFER, true);
                        }
@@ -1727,25 +1809,6 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
        }
 }
 
-/* Reallocate a buffer a update all resource bindings where the buffer is
- * bound.
- *
- * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
- * idle by discarding its contents. Apps usually tell us when to do this using
- * map_buffer flags, for example.
- */
-static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
-{
-       struct si_context *sctx = (struct si_context*)ctx;
-       struct r600_resource *rbuffer = r600_resource(buf);
-       uint64_t old_va = rbuffer->gpu_address;
-
-       /* Reallocate the buffer in the same pipe_resource. */
-       si_alloc_resource(sctx->screen, rbuffer);
-
-       si_rebind_buffer(ctx, buf, old_va);
-}
-
 static void si_upload_bindless_descriptor(struct si_context *sctx,
                                          unsigned desc_slot,
                                          unsigned num_dwords)
@@ -1846,7 +1909,7 @@ static void si_update_bindless_image_descriptor(struct si_context *sctx,
        memcpy(desc_list, desc->list + desc_slot_offset,
               sizeof(desc_list));
        si_set_shader_image_desc(sctx, view, true,
-                                desc->list + desc_slot_offset);
+                                desc->list + desc_slot_offset, NULL);
 
        if (memcmp(desc_list, desc->list + desc_slot_offset,
                   sizeof(desc_list))) {
@@ -1912,6 +1975,7 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
        }
 
        si_update_all_resident_texture_descriptors(sctx);
+       si_update_ps_colorbuf0_slot(sctx);
 }
 
 /* SHADER USER DATA */
@@ -2011,7 +2075,7 @@ static void si_emit_shader_pointer_body(struct si_screen *sscreen,
        radeon_emit(cs, va);
 
        if (HAVE_32BIT_POINTERS)
-               assert((va >> 32) == sscreen->info.address32_hi);
+               assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
        else
                radeon_emit(cs, va >> 32);
 }
@@ -2451,7 +2515,7 @@ static uint64_t si_create_image_handle(struct pipe_context *ctx,
        memset(desc_list, 0, sizeof(desc_list));
        si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
 
-       si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
+       si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL);
 
        img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
                                                              sizeof(desc_list));
@@ -2701,8 +2765,6 @@ void si_init_all_descriptors(struct si_context *sctx)
        sctx->b.b.create_image_handle = si_create_image_handle;
        sctx->b.b.delete_image_handle = si_delete_image_handle;
        sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
-       sctx->b.invalidate_buffer = si_invalidate_buffer;
-       sctx->b.rebind_buffer = si_rebind_buffer;
 
        /* Shader user data. */
        si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,