r600g: add cs memory usage accounting and limit it v3
authorJerome Glisse <jglisse@redhat.com>
Tue, 29 Jan 2013 17:52:17 +0000 (12:52 -0500)
committerJerome Glisse <jglisse@redhat.com>
Thu, 31 Jan 2013 19:23:52 +0000 (14:23 -0500)
We are now seing cs that can go over the vram+gtt size to avoid
failing flush early cs that goes over 70% (gtt+vram) usage. 70%
is use to allow some fragmentation.

The idea is to compute a gross estimate of memory requirement of
each draw call. After each draw call, memory will be precisely
accounted. So the uncertainty is only on the current draw call.
In practice this gave very good estimate (+/- 10% of the target
memory limit).

v2: Remove left over from testing version, remove useless NULL
    checking. Improve commit message.
v3: Add comment to code on memory accounting precision

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_winsys.h

index 0a3861f30f33ede201305c673de59b9f8ef49a2d..5dd8b130e9e01cacb9540aeddc6ed9db00d7d038 100644 (file)
@@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                surf = (struct r600_surface*)state->cbufs[i];
                rtex = (struct r600_texture*)surf->base.texture;
 
+               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
+
                if (!surf->color_initialized) {
                        evergreen_init_color_surface(rctx, surf);
                }
@@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
        if (state->zsbuf) {
                surf = (struct r600_surface*)state->zsbuf;
 
+               r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
                if (!surf->depth_initialized) {
                        evergreen_init_depth_surface(rctx, surf);
                }
index 23f488a5e7ca3f57fb44d29bff06e0c95a143358..a89f23069d3db6999a0f4403496d741e804fae25 100644 (file)
@@ -359,6 +359,16 @@ out_err:
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
                        boolean count_draw_in)
 {
+       if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
+               ctx->gtt = 0;
+               ctx->vram = 0;
+               ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+               return;
+       }
+       /* all will be accounted once relocation are emited */
+       ctx->gtt = 0;
+       ctx->vram = 0;
+
        /* The number of dwords we already used in the CS so far. */
        num_dw += ctx->rings.gfx.cs->cdw;
 
@@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
 
        ctx->pm4_dirty_cdwords = 0;
        ctx->flags = 0;
+       ctx->gtt = 0;
+       ctx->vram = 0;
 
        /* Begin a new CS. */
        r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
index 3ff42d38f15c71a1d3c0fc2ca23628b07864528f..ec59c929524cbe7c7ee2b32dda0ab6c7f7873d0b 100644 (file)
@@ -447,6 +447,10 @@ struct r600_context {
        unsigned                        backend_mask;
        unsigned                        max_db; /* for OQ */
 
+       /* current unaccounted memory usage */
+       uint64_t                        vram;
+       uint64_t                        gtt;
+
        /* Miscellaneous state objects. */
        void                            *custom_dsa_flush;
        void                            *custom_blend_resolve;
@@ -998,4 +1002,28 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
        }
 }
 
+static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_resource *rr = (struct r600_resource *)r;
+
+       if (r == NULL) {
+               return;
+       }
+
+       /*
+        * The idea is to compute a gross estimate of memory requirement of
+        * each draw call. After each draw call, memory will be precisely
+        * accounted. So the uncertainty is only on the current draw call.
+        * In practice this gave very good estimate (+/- 10% of the target
+        * memory limit).
+        */
+       if (rr->domains & RADEON_DOMAIN_GTT) {
+               rctx->gtt += rr->buf->size;
+       }
+       if (rr->domains & RADEON_DOMAIN_VRAM) {
+               rctx->vram += rr->buf->size;
+       }
+}
+
 #endif
index c0bc2a5b9c131ea0e0886224c1bbb43eb6514432..44cd00ead8ac68e38b525e75e08ef3901ae5cf33 100644 (file)
@@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 
                surf = (struct r600_surface*)state->cbufs[i];
                rtex = (struct r600_texture*)surf->base.texture;
+               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
 
                if (!surf->color_initialized || force_cmask_fmask) {
                        r600_init_color_surface(rctx, surf, force_cmask_fmask);
@@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
        if (state->zsbuf) {
                surf = (struct r600_surface*)state->zsbuf;
 
+               r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
                if (!surf->depth_initialized) {
                        r600_init_depth_surface(rctx, surf);
                }
index 9386f618b3aaf6d2bd5bc310eb9bb3960ddc672d..33200a6d178796ef5f6a42dd66f291641ab35095 100644 (file)
@@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
 
        if (ib) {
                pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
-               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+               r600_context_add_resource_size(ctx, ib->buffer);
        } else {
                pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
        }
@@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
                                        vb[i].buffer_offset = input[i].buffer_offset;
                                        pipe_resource_reference(&vb[i].buffer, input[i].buffer);
                                        new_buffer_mask |= 1 << i;
+                                       r600_context_add_resource_size(ctx, input[i].buffer);
                                } else {
                                        pipe_resource_reference(&vb[i].buffer, NULL);
                                        disable_mask |= 1 << i;
@@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 
                        pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
                        new_mask |= 1 << i;
+                       r600_context_add_resource_size(pipe, views[i]->texture);
                } else {
                        pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
                        disable_mask |= 1 << i;
@@ -806,6 +809,8 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
        rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
        r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
 
+       r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
+
        if (rctx->chip_class <= R700) {
                bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
 
@@ -835,6 +840,8 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
        if (state) {
                r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
 
+               r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
+
                /* Update clip misc state. */
                if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
                    rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
@@ -938,10 +945,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
                } else {
                        u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
                }
+               /* account it in gtt */
+               rctx->gtt += input->buffer_size;
        } else {
                /* Setup the hw buffer. */
                cb->buffer_offset = input->buffer_offset;
                pipe_resource_reference(&cb->buffer, input->buffer);
+               r600_context_add_resource_size(ctx, input->buffer);
        }
 
        state->enabled_mask |= 1 << index;
@@ -1004,6 +1014,7 @@ static void r600_set_so_targets(struct pipe_context *ctx,
        /* Set the new targets. */
        for (i = 0; i < num_targets; i++) {
                pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+               r600_context_add_resource_size(ctx, targets[i]->buffer);
        }
        for (; i < rctx->num_so_targets; i++) {
                pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
index cab27040bba8b5b138b1f0b0dbed447fe961f214..6a7115ba76bf023093fc6e34dec41d8f8a7377bc 100644 (file)
@@ -383,6 +383,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
     return status;
 }
 
+static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
+{
+    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
+        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
+
+    return status;
+}
+
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
                                       struct radeon_winsys_cs_handle *buf)
 {
@@ -575,6 +585,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_destroy = radeon_drm_cs_destroy;
     ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
     ws->base.cs_validate = radeon_drm_cs_validate;
+    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
     ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
index 7fdef3fad87531eb4ef21d76892442d3cb485153..8b64ef2e1a17e3d41c504afd289f7140333ab7c5 100644 (file)
@@ -392,6 +392,16 @@ struct radeon_winsys {
      */
     boolean (*cs_validate)(struct radeon_winsys_cs *cs);
 
+    /**
+     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * added so far.
+     *
+     * \param cs        A command stream to validate.
+     * \param vram      VRAM memory size pending to be use
+     * \param gtt       GTT memory size pending to be use
+     */
+    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+
     /**
      * Write a relocated dword to a command buffer.
      *