lima: use single BO for GP outputs

author Vasily Khoruzhick <anarsoul@gmail.com>

Sun, 24 Nov 2019 22:34:44 +0000 (14:34 -0800)

committer Vasily Khoruzhick <anarsoul@gmail.com>

Thu, 19 Dec 2019 22:28:32 +0000 (14:28 -0800)
author Vasily Khoruzhick <anarsoul@gmail.com>
Sun, 24 Nov 2019 22:34:44 +0000 (14:34 -0800)
committer Vasily Khoruzhick <anarsoul@gmail.com>
Thu, 19 Dec 2019 22:28:32 +0000 (14:28 -0800)
diff --git a/src/gallium/drivers/lima/lima_context.c b/src/gallium/drivers/lima/lima_context.c

index 813e87361bbf12270050998bb9d3728c02d39dcc..a031222a423ce22363b9f93f56bd7ab67f271cbe 100644 (file)
--- a/src/gallium/drivers/lima/lima_context.c
+++ b/src/gallium/drivers/lima/lima_context.c
@@ -146,6 +146,9 @@ lima_context_destroy(struct pipe_context *pctx)
     if (ctx->plb_gp_stream)
        lima_bo_unreference(ctx->plb_gp_stream);
  
+   if (ctx->gp_output)
+      lima_bo_unreference(ctx->gp_output);
+
     if (ctx->plb_pp_stream)
        assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream));
  
diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h

index 1a0dee27885f5a53f12440e4b1d89d14e7aba2f6..1b6a89a7bc10b50df4cf7cd6e1d8f6fdb191bdc7 100644 (file)
--- a/src/gallium/drivers/lima/lima_context.h
+++ b/src/gallium/drivers/lima/lima_context.h
@@ -121,8 +121,6 @@ struct lima_context_constant_buffer {
  };
  
  enum lima_ctx_buff {
-   lima_ctx_buff_sh_gl_pos,
-   lima_ctx_buff_sh_gl_point_size,
     lima_ctx_buff_gp_varying_info,
     lima_ctx_buff_gp_attribute_info,
     lima_ctx_buff_gp_uniform,
@@ -226,7 +224,9 @@ struct lima_context {
     struct lima_bo *gp_tile_heap[LIMA_CTX_PLB_MAX_NUM];
     #define gp_tile_heap_size         0x100000
     struct lima_bo *plb_gp_stream;
-   struct lima_bo *sh_varying;
+   struct lima_bo *gp_output;
+   uint32_t gp_output_varyings_offt;
+   uint32_t gp_output_point_size_offt;
  
     struct hash_table *plb_pp_stream;
     uint32_t plb_index;
diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c

index 718f90546c710982a77239aea09c24c74788e893..b2018b3bcfb802bedb864e6052c99add67e326c2 100644 (file)
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -804,12 +804,9 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info)
     else
        PLBU_CMD_PRIMITIVE_SETUP(0x2000, cull, info->index_size);
  
-   uint32_t gl_position_va =
-      lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos,
-                       LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP);
     PLBU_CMD_RSW_VERTEX_ARRAY(
        lima_ctx_buff_va(ctx, lima_ctx_buff_pp_plb_rsw, LIMA_CTX_BUFF_SUBMIT_PP),
-      gl_position_va);
+      ctx->gp_output->va);
  
     /* TODO
      * - we should set it only for the first draw that enabled the scissor and for
@@ -836,14 +833,9 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info)
     }
  
     if (info->index_size) {
-      PLBU_CMD_INDEXED_DEST(gl_position_va);
-      if (vs->point_size_idx != -1) {
-         uint32_t gl_point_size_va =
-            lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_point_size,
-                             LIMA_CTX_BUFF_SUBMIT_GP |
-                             LIMA_CTX_BUFF_SUBMIT_PP);
-         PLBU_CMD_INDEXED_PT_SIZE(gl_point_size_va);
-      }
+      PLBU_CMD_INDEXED_DEST(ctx->gp_output->va);
+      if (vs->point_size_idx != -1)
+         PLBU_CMD_INDEXED_PT_SIZE(ctx->gp_output->va + ctx->gp_output_point_size_offt);
  
        PLBU_CMD_INDICES(ctx->index_res->bo->va + info->start * info->index_size + ctx->index_offset);
     }
@@ -1105,8 +1097,8 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
  
     if (ctx->vs->num_varyings) {
        render->varying_types = 0x00000000;
-      render->varyings_address = ctx->sh_varying->va;
-      lima_submit_add_bo(ctx->pp_submit, ctx->sh_varying, LIMA_SUBMIT_BO_READ);
+      render->varyings_address = ctx->gp_output->va +
+                                 ctx->gp_output_varyings_offt;
        for (int i = 0, index = 0; i < ctx->vs->num_outputs; i++) {
           int val;
  
@@ -1246,17 +1238,13 @@ lima_update_varying(struct lima_context *ctx, const struct pipe_draw_info *info)
  {
     struct lima_screen *screen = lima_screen(ctx->base.screen);
     struct lima_vs_shader_state *vs = ctx->vs;
+   uint32_t gp_output_size;
  
     uint32_t *varying =
        lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_varying_info,
                            vs->num_outputs * 8, true);
     int n = 0;
  
-   /* should be LIMA_SUBMIT_BO_WRITE for GP, but each draw will use
-    * different part of this bo, so no need to set exclusive constraint */
-   lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_gl_pos,
-                       4 * 4 * info->count, false);
-
     int offset = 0;
  
     for (int i = 0; i < vs->num_outputs; i++) {
@@ -1278,35 +1266,44 @@ lima_update_varying(struct lima_context *ctx, const struct pipe_draw_info *info)
  
     vs->varying_stride = align(offset, 16);
  
+   /* gl_Position is always present, allocate space for it */
+   gp_output_size = align(4 * 4 * info->count, 0x40);
+
+   /* Allocate space for varyings if there're any */
     if (vs->num_varyings) {
-      /* sh_varying can be too large for the suballocators, so create a
-       * separate bo for it. The bo cache should prevent a performance hit. */
-      ctx->sh_varying = lima_bo_create(screen,
-                                       vs->varying_stride * info->count, 0);
-      assert(ctx->sh_varying);
-      lima_submit_add_bo(ctx->gp_submit, ctx->sh_varying, LIMA_SUBMIT_BO_WRITE);
+      ctx->gp_output_varyings_offt = gp_output_size;
+      gp_output_size += align(vs->varying_stride * info->count, 0x40);
     }
  
+   /* Allocate space for gl_PointSize if it's there */
+   if (vs->point_size_idx != -1) {
+      ctx->gp_output_point_size_offt = gp_output_size;
+      gp_output_size += 4 * info->count;
+   }
+
+   /* gp_output can be too large for the suballocator, so create a
+    * separate bo for it. The bo cache should prevent performance hit.
+    */
+   ctx->gp_output = lima_bo_create(screen, gp_output_size, 0);
+   assert(ctx->gp_output);
+   lima_submit_add_bo(ctx->gp_submit, ctx->gp_output, LIMA_SUBMIT_BO_WRITE);
+   lima_submit_add_bo(ctx->pp_submit, ctx->gp_output, LIMA_SUBMIT_BO_READ);
+
     for (int i = 0; i < vs->num_outputs; i++) {
        struct lima_varying_info *v = vs->varying + i;
  
        if (i == vs->gl_pos_idx) {
           /* gl_Position */
-         varying[n++] =
-            lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos,
-                             LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP);
+         varying[n++] = ctx->gp_output->va;
           varying[n++] = 0x8020;
        } else if (i == vs->point_size_idx) {
           /* gl_PointSize */
-         lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_gl_point_size,
-                             4 * info->count, false);
-         varying[n++] =
-            lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_point_size,
-                             LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP);
+         varying[n++] = ctx->gp_output->va + ctx->gp_output_point_size_offt;
           varying[n++] = 0x2021;
        } else {
           /* Varying */
-         varying[n++] = ctx->sh_varying->va + v->offset;
+         varying[n++] = ctx->gp_output->va + ctx->gp_output_varyings_offt +
+                        v->offset;
           varying[n++] = (vs->varying_stride << 11) | (v->components - 1) |
              (v->component_size == 2 ? 0x0C : 0);
        }
@@ -1353,9 +1350,9 @@ lima_draw_vbo_update(struct pipe_context *pctx,
     lima_pack_render_state(ctx, info);
     lima_pack_plbu_cmd(ctx, info);
  
-   if (ctx->sh_varying) {
-      lima_bo_unreference(ctx->sh_varying); /* held by submit */
-      ctx->sh_varying = NULL;
+   if (ctx->gp_output) {
+      lima_bo_unreference(ctx->gp_output); /* held by submit */
+      ctx->gp_output = NULL;
     }
  
     ctx->dirty = 0;
@@ -1620,11 +1617,11 @@ _lima_flush(struct lima_context *ctx, bool end_of_frame)
  
     if (lima_dump_command_stream) {
        if (lima_submit_wait(ctx->gp_submit, PIPE_TIMEOUT_INFINITE)) {
-         if (ctx->buffer_state[lima_ctx_buff_sh_gl_pos].res) {
-            float *pos = lima_ctx_buff_map(ctx, lima_ctx_buff_sh_gl_pos);
+         if (ctx->gp_output) {
+            float *pos = lima_bo_map(ctx->gp_output);
              lima_dump_command_stream_print(
                 pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
-               lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos, 0));
+               ctx->gp_output->va);
           }
  
           uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]);
author	Vasily Khoruzhick <anarsoul@gmail.com>
	Sun, 24 Nov 2019 22:34:44 +0000 (14:34 -0800)
committer	Vasily Khoruzhick <anarsoul@gmail.com>
	Thu, 19 Dec 2019 22:28:32 +0000 (14:28 -0800)
src/gallium/drivers/lima/lima_context.c		patch \| blob \| history
src/gallium/drivers/lima/lima_context.h		patch \| blob \| history
src/gallium/drivers/lima/lima_draw.c		patch \| blob \| history