turnip: make tiling config part of framebuffer state
authorJonathan Marek <jonathan@marek.ca>
Fri, 19 Jun 2020 00:39:39 +0000 (20:39 -0400)
committerMarge Bot <eric+marge@anholt.net>
Fri, 3 Jul 2020 14:49:10 +0000 (14:49 +0000)
Compute the tiling config at framebuffer creation time. A framebuffer will b
be re-used multiple times, so this will avoid having to re-calculate the
tiling config every time a command buffer is recorded.

The tiling config already couldn't use the render area's x1/y1 because of
hw binning, this move makes it so the render area isn't used at all for the
tiling config.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5570>

src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_util.c

index e93ef73c1416141d0e203a0314f5f4a709f5babb..114786432eb0f8559fa2f7a9c1a43c4dd0b58c7c 100644 (file)
@@ -2321,10 +2321,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                          uint32_t a,
                          uint32_t gmem_a)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-   const VkRect2D *render_area = &tiling->render_area;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
+   const VkRect2D *render_area = &cmd->state.render_area;
    struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
-   struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
+   struct tu_image_view *iview = fb->attachments[a].attachment;
    struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
 
    if (!dst->store)
@@ -2377,7 +2377,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                    A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
                    A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
                    A6XX_SP_PS_2D_SRC_HI(),
-                   A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
+                   A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
 
    /* sync GMEM writes with CACHE. */
    tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
index 871a5b8d35d96d0863ec816fb36b78b8c44a7e4e..fb24e17be0d5bce93b9db8c28069a98a73ed329f 100644 (file)
@@ -109,177 +109,29 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
 }
 
 static void
-tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
-                                    const struct tu_device *dev,
-                                    const struct tu_render_pass *pass)
-{
-   const uint32_t tile_align_w = pass->tile_align_w;
-   const uint32_t max_tile_width = 1024;
-
-   /* note: don't offset the tiling config by render_area.offset,
-    * because binning pass can't deal with it
-    * this means we might end up with more tiles than necessary,
-    * but load/store/etc are still scissored to the render_area
-    */
-   tiling->tile0.offset = (VkOffset2D) {};
-
-   const uint32_t ra_width =
-      tiling->render_area.extent.width +
-      (tiling->render_area.offset.x - tiling->tile0.offset.x);
-   const uint32_t ra_height =
-      tiling->render_area.extent.height +
-      (tiling->render_area.offset.y - tiling->tile0.offset.y);
-
-   /* start from 1 tile */
-   tiling->tile_count = (VkExtent2D) {
-      .width = 1,
-      .height = 1,
-   };
-   tiling->tile0.extent = (VkExtent2D) {
-      .width = util_align_npot(ra_width, tile_align_w),
-      .height = align(ra_height, TILE_ALIGN_H),
-   };
-
-   if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
-      /* start with 2x2 tiles */
-      tiling->tile_count.width = 2;
-      tiling->tile_count.height = 2;
-      tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
-      tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
-   }
-
-   /* do not exceed max tile width */
-   while (tiling->tile0.extent.width > max_tile_width) {
-      tiling->tile_count.width++;
-      tiling->tile0.extent.width =
-         util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
-   }
-
-   /* will force to sysmem, don't bother trying to have a valid tile config
-    * TODO: just skip all GMEM stuff when sysmem is forced?
-    */
-   if (!pass->gmem_pixels)
-      return;
-
-   /* do not exceed gmem size */
-   while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
-      if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
-         tiling->tile_count.width++;
-         tiling->tile0.extent.width =
-            util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
-      } else {
-         /* if this assert fails then layout is impossible.. */
-         assert(tiling->tile0.extent.height > TILE_ALIGN_H);
-         tiling->tile_count.height++;
-         tiling->tile0.extent.height =
-            align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
-      }
-   }
-}
-
-static void
-tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
-                                    const struct tu_device *dev)
-{
-   const uint32_t max_pipe_count = 32; /* A6xx */
-
-   /* start from 1 tile per pipe */
-   tiling->pipe0 = (VkExtent2D) {
-      .width = 1,
-      .height = 1,
-   };
-   tiling->pipe_count = tiling->tile_count;
-
-   while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
-      if (tiling->pipe0.width < tiling->pipe0.height) {
-         tiling->pipe0.width += 1;
-         tiling->pipe_count.width =
-            DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
-      } else {
-         tiling->pipe0.height += 1;
-         tiling->pipe_count.height =
-            DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
-      }
-   }
-}
-
-static void
-tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
-                              const struct tu_device *dev)
-{
-   const uint32_t max_pipe_count = 32; /* A6xx */
-   const uint32_t used_pipe_count =
-      tiling->pipe_count.width * tiling->pipe_count.height;
-   const VkExtent2D last_pipe = {
-      .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
-      .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
-   };
-
-   assert(used_pipe_count <= max_pipe_count);
-   assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
-
-   for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
-      for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
-         const uint32_t pipe_x = tiling->pipe0.width * x;
-         const uint32_t pipe_y = tiling->pipe0.height * y;
-         const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
-                                    ? last_pipe.width
-                                    : tiling->pipe0.width;
-         const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
-                                    ? last_pipe.height
-                                    : tiling->pipe0.height;
-         const uint32_t n = tiling->pipe_count.width * y + x;
-
-         tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
-                                  A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
-                                  A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
-                                  A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
-         tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
-      }
-   }
-
-   memset(tiling->pipe_config + used_pipe_count, 0,
-          sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
-}
-
-static void
-tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
-                          const struct tu_device *dev,
+tu_tiling_config_get_tile(const struct tu_framebuffer *fb,
                           uint32_t tx,
                           uint32_t ty,
-                          struct tu_tile *tile)
+                          uint32_t *pipe,
+                          uint32_t *slot)
 {
    /* find the pipe and the slot for tile (tx, ty) */
-   const uint32_t px = tx / tiling->pipe0.width;
-   const uint32_t py = ty / tiling->pipe0.height;
-   const uint32_t sx = tx - tiling->pipe0.width * px;
-   const uint32_t sy = ty - tiling->pipe0.height * py;
+   const uint32_t px = tx / fb->pipe0.width;
+   const uint32_t py = ty / fb->pipe0.height;
+   const uint32_t sx = tx - fb->pipe0.width * px;
+   const uint32_t sy = ty - fb->pipe0.height * py;
    /* last pipe has different width */
    const uint32_t pipe_width =
-      MIN2(tiling->pipe0.width,
-           tiling->tile_count.width - px * tiling->pipe0.width);
+      MIN2(fb->pipe0.width,
+           fb->tile_count.width - px * fb->pipe0.width);
 
-   assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
-   assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
-   assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
+   assert(tx < fb->tile_count.width && ty < fb->tile_count.height);
+   assert(px < fb->pipe_count.width && py < fb->pipe_count.height);
+   assert(sx < fb->pipe0.width && sy < fb->pipe0.height);
 
    /* convert to 1D indices */
-   tile->pipe = tiling->pipe_count.width * py + px;
-   tile->slot = pipe_width * sy + sx;
-
-   /* get the blit area for the tile */
-   tile->begin = (VkOffset2D) {
-      .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
-      .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
-   };
-   tile->end.x =
-      (tx == tiling->tile_count.width - 1)
-         ? tiling->render_area.offset.x + tiling->render_area.extent.width
-         : tile->begin.x + tiling->tile0.extent.width;
-   tile->end.y =
-      (ty == tiling->tile_count.height - 1)
-         ? tiling->render_area.offset.y + tiling->render_area.extent.height
-         : tile->begin.y + tiling->tile0.extent.height;
+   *pipe = fb->pipe_count.width * py + px;
+   *slot = pipe_width * sy + sx;
 }
 
 void
@@ -602,7 +454,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
 static void
 tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
 {
-   const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
+   const VkRect2D *render_area = &cmd->state.render_area;
    uint32_t x1 = render_area->offset.x;
    uint32_t y1 = render_area->offset.y;
    uint32_t x2 = x1 + render_area->extent.width - 1;
@@ -706,7 +558,7 @@ tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
 static bool
 use_hw_binning(struct tu_cmd_buffer *cmd)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
    /* XFB commands are emitted for BINNING || SYSMEM, which makes it incompatible
     * with non-hw binning GMEM rendering. this is required because some of the
@@ -721,7 +573,7 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
    if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
       return true;
 
-   return (tiling->tile_count.width * tiling->tile_count.height) > 2;
+   return (fb->tile_count.width * fb->tile_count.height) > 2;
 }
 
 static bool
@@ -740,24 +592,29 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
    if (cmd->has_tess)
       return true;
 
-   return cmd->state.tiling_config.force_sysmem;
+   return false;
 }
 
 static void
 tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
                      struct tu_cs *cs,
-                     const struct tu_tile *tile)
+                     uint32_t tx, uint32_t ty)
 {
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
+   uint32_t pipe, slot;
+
+   tu_tiling_config_get_tile(fb, tx, ty, &pipe, &slot);
+
    tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
    tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_YIELD));
 
    tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
    tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
 
-   const uint32_t x1 = tile->begin.x;
-   const uint32_t y1 = tile->begin.y;
-   const uint32_t x2 = tile->end.x - 1;
-   const uint32_t y2 = tile->end.y - 1;
+   const uint32_t x1 = fb->tile0.width * tx;
+   const uint32_t y1 = fb->tile0.height * ty;
+   const uint32_t x2 = x1 + fb->tile0.width - 1;
+   const uint32_t y2 = y1 + fb->tile0.height - 1;
    tu6_emit_window_scissor(cs, x1, y1, x2, y2);
    tu6_emit_window_offset(cs, x1, y1);
 
@@ -771,11 +628,11 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
       tu_cs_emit(cs, 0x0);
 
       tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
-      tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
-                     CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
-      tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + tile->pipe * cmd->vsc_draw_strm_pitch);
-      tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + (tile->pipe * 4) + (32 * cmd->vsc_draw_strm_pitch));
-      tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + (tile->pipe * cmd->vsc_prim_strm_pitch));
+      tu_cs_emit(cs, fb->pipe_sizes[pipe] |
+                     CP_SET_BIN_DATA5_0_VSC_N(slot));
+      tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * cmd->vsc_draw_strm_pitch);
+      tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * 4 + 32 * cmd->vsc_draw_strm_pitch);
+      tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + pipe * cmd->vsc_prim_strm_pitch);
 
       tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
       tu_cs_emit(cs, 0x0);
@@ -801,7 +658,7 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
    struct tu_image_view *dst = fb->attachments[a].attachment;
    struct tu_image_view *src = fb->attachments[gmem_a].attachment;
 
-   tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
+   tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.render_area);
 }
 
 static void
@@ -1009,21 +866,20 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 static void
 update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
    tu_cs_emit_regs(cs,
-                   A6XX_VSC_BIN_SIZE(.width = tiling->tile0.extent.width,
-                                     .height = tiling->tile0.extent.height),
+                   A6XX_VSC_BIN_SIZE(.width = fb->tile0.width,
+                                     .height = fb->tile0.height),
                    A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
                                                    .bo_offset = 32 * cmd->vsc_draw_strm_pitch));
 
    tu_cs_emit_regs(cs,
-                   A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
-                                      .ny = tiling->tile_count.height));
+                   A6XX_VSC_BIN_COUNT(.nx = fb->tile_count.width,
+                                      .ny = fb->tile_count.height));
 
    tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
-   for (unsigned i = 0; i < 32; i++)
-      tu_cs_emit(cs, tiling->pipe_config[i]);
+   tu_cs_emit_array(cs, fb->pipe_config, 32);
 
    tu_cs_emit_regs(cs,
                    A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
@@ -1039,9 +895,9 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 static void
 emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
    const uint32_t used_pipe_count =
-      tiling->pipe_count.width * tiling->pipe_count.height;
+      fb->pipe_count.width * fb->pipe_count.height;
 
    /* Clear vsc_scratch: */
    tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
@@ -1078,14 +934,9 @@ static void
 tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
    struct tu_physical_device *phys_dev = cmd->device->physical_device;
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
-   uint32_t x1 = tiling->tile0.offset.x;
-   uint32_t y1 = tiling->tile0.offset.y;
-   uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
-   uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
-   tu6_emit_window_scissor(cs, x1, y1, x2, y2);
+   tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
 
    tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
    tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
@@ -1213,7 +1064,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
       dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
       dst[2] =
          A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
-         A6XX_TEX_CONST_2_PITCH(cmd->state.tiling_config.tile0.extent.width * att->cpp);
+         A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * att->cpp);
       dst[3] = 0;
       dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
       dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
@@ -1282,8 +1133,7 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
 }
 
 static void
-tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
-                        const struct VkRect2D *renderArea)
+tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
    const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
@@ -1348,14 +1198,12 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
 
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
    if (use_hw_binning(cmd)) {
       /* enable stream-out during binning pass: */
       tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
 
-      tu6_emit_bin_size(cs,
-                        tiling->tile0.extent.width,
-                        tiling->tile0.extent.height,
+      tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
                         A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
 
       tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, true);
@@ -1365,9 +1213,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
       /* and disable stream-out for draw pass: */
       tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=true));
 
-      tu6_emit_bin_size(cs,
-                        tiling->tile0.extent.width,
-                        tiling->tile0.extent.height,
+      tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
                         A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
 
       tu_cs_emit_regs(cs,
@@ -1383,10 +1229,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
       /* no binning pass, so enable stream-out for draw pass:: */
       tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
 
-      tu6_emit_bin_size(cs,
-                        tiling->tile0.extent.width,
-                        tiling->tile0.extent.height,
-                        0x6000000);
+      tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height, 0x6000000);
    }
 
    tu_cs_sanity_check(cs);
@@ -1395,9 +1238,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 static void
 tu6_render_tile(struct tu_cmd_buffer *cmd,
                 struct tu_cs *cs,
-                const struct tu_tile *tile)
+                uint32_t tx, uint32_t ty)
 {
-   tu6_emit_tile_select(cmd, cs, tile);
+   tu6_emit_tile_select(cmd, cs, tx, ty);
 
    tu_cs_emit_call(cs, &cmd->draw_cs);
 
@@ -1429,19 +1272,16 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 static void
 tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
    if (use_hw_binning(cmd))
       cmd->use_vsc_data = true;
 
    tu6_tile_render_begin(cmd, &cmd->cs);
 
-   for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
-      for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
-         struct tu_tile tile;
-         tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
-         tu6_render_tile(cmd, &cmd->cs, &tile);
-      }
+   for (uint32_t y = 0; y < fb->tile_count.height; y++) {
+      for (uint32_t x = 0; x < fb->tile_count.width; x++)
+         tu6_render_tile(cmd, &cmd->cs, x, y);
    }
 
    tu6_tile_render_end(cmd, &cmd->cs);
@@ -1450,9 +1290,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
 static void
 tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
 {
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
-   tu6_sysmem_render_begin(cmd, &cmd->cs, &tiling->render_area);
+   tu6_sysmem_render_begin(cmd, &cmd->cs);
 
    tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
 
@@ -1478,21 +1316,6 @@ tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
    cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs);
 }
 
-static void
-tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
-                            const VkRect2D *render_area)
-{
-   const struct tu_device *dev = cmd->device;
-   struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-
-   tiling->render_area = *render_area;
-   tiling->force_sysmem = false;
-
-   tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
-   tu_tiling_config_update_pipe_layout(tiling, dev);
-   tu_tiling_config_update_pipes(tiling, dev);
-}
-
 static VkResult
 tu_create_cmd_buffer(struct tu_device *device,
                      struct tu_cmd_pool *pool,
@@ -2791,8 +2614,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
    cmd->state.pass = pass;
    cmd->state.subpass = pass->subpasses;
    cmd->state.framebuffer = fb;
+   cmd->state.render_area = pRenderPassBegin->renderArea;
 
-   tu_cmd_update_tiling_config(cmd, &pRenderPassBegin->renderArea);
    tu_cmd_prepare_tile_store_ib(cmd);
 
    /* Note: because this is external, any flushes will happen before draw_cs
index ec2bae0b3920796efb7d9021536e0c33163dc3e1..716c168d2e4bb40681a2d372b8f1def6e957fc68 100644 (file)
@@ -2271,6 +2271,7 @@ tu_CreateFramebuffer(VkDevice _device,
                      VkFramebuffer *pFramebuffer)
 {
    TU_FROM_HANDLE(tu_device, device, _device);
+   TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
    struct tu_framebuffer *framebuffer;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
@@ -2292,6 +2293,8 @@ tu_CreateFramebuffer(VkDevice _device,
       framebuffer->attachments[i].attachment = iview;
    }
 
+   tu_framebuffer_tiling_config(framebuffer, device, pass);
+
    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
    return VK_SUCCESS;
 }
index 0d59c3c065944c41546482b7e9edabf11238764d..12e5b0739a6dbe745ea199eb67fe83668241e429 100644 (file)
@@ -654,36 +654,6 @@ struct tu_descriptor_state
    uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
 };
 
-struct tu_tile
-{
-   uint8_t pipe;
-   uint8_t slot;
-   VkOffset2D begin;
-   VkOffset2D end;
-};
-
-struct tu_tiling_config
-{
-   VkRect2D render_area;
-
-   /* position and size of the first tile */
-   VkRect2D tile0;
-   /* number of tiles */
-   VkExtent2D tile_count;
-
-   /* size of the first VSC pipe */
-   VkExtent2D pipe0;
-   /* number of VSC pipes */
-   VkExtent2D pipe_count;
-
-   /* pipe register values */
-   uint32_t pipe_config[MAX_VSC_PIPES];
-   uint32_t pipe_sizes[MAX_VSC_PIPES];
-
-   /* Whether sysmem rendering must be used */
-   bool force_sysmem;
-};
-
 enum tu_cmd_dirty_bits
 {
    TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
@@ -859,8 +829,7 @@ struct tu_cmd_state
    const struct tu_render_pass *pass;
    const struct tu_subpass *subpass;
    const struct tu_framebuffer *framebuffer;
-
-   struct tu_tiling_config tiling_config;
+   VkRect2D render_area;
 
    struct tu_cs_entry tile_store_ib;
 
@@ -1389,10 +1358,29 @@ struct tu_framebuffer
    uint32_t height;
    uint32_t layers;
 
+   /* size of the first tile */
+   VkExtent2D tile0;
+   /* number of tiles */
+   VkExtent2D tile_count;
+
+   /* size of the first VSC pipe */
+   VkExtent2D pipe0;
+   /* number of VSC pipes */
+   VkExtent2D pipe_count;
+
+   /* pipe register values */
+   uint32_t pipe_config[MAX_VSC_PIPES];
+   uint32_t pipe_sizes[MAX_VSC_PIPES];
+
    uint32_t attachment_count;
    struct tu_attachment_info attachments[0];
 };
 
+void
+tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
+                             const struct tu_device *device,
+                             const struct tu_render_pass *pass);
+
 struct tu_subpass_barrier {
    VkPipelineStageFlags src_stage_mask;
    VkAccessFlags src_access_mask;
index 9a0e5cce4c947d814c635c2a4944447cdf6494af..ba1e4d53cd6cc4158d5821eb050f277e60650c4b 100644 (file)
@@ -116,3 +116,133 @@ __vk_errorf(struct tu_instance *instance,
 
    return error;
 }
+
+static void
+tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
+                                    const struct tu_device *dev,
+                                    const struct tu_render_pass *pass)
+{
+   const uint32_t tile_align_w = pass->tile_align_w;
+   const uint32_t max_tile_width = 1024;
+
+   /* start from 1 tile */
+   fb->tile_count = (VkExtent2D) {
+      .width = 1,
+      .height = 1,
+   };
+   fb->tile0 = (VkExtent2D) {
+      .width = util_align_npot(fb->width, tile_align_w),
+      .height = align(fb->height, TILE_ALIGN_H),
+   };
+
+   if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
+      /* start with 2x2 tiles */
+      fb->tile_count.width = 2;
+      fb->tile_count.height = 2;
+      fb->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
+      fb->tile0.height = align(DIV_ROUND_UP(fb->height, 2), TILE_ALIGN_H);
+   }
+
+   /* do not exceed max tile width */
+   while (fb->tile0.width > max_tile_width) {
+      fb->tile_count.width++;
+      fb->tile0.width =
+         util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
+   }
+
+   /* will force to sysmem, don't bother trying to have a valid tile config
+    * TODO: just skip all GMEM stuff when sysmem is forced?
+    */
+   if (!pass->gmem_pixels)
+      return;
+
+   /* do not exceed gmem size */
+   while (fb->tile0.width * fb->tile0.height > pass->gmem_pixels) {
+      if (fb->tile0.width > MAX2(tile_align_w, fb->tile0.height)) {
+         fb->tile_count.width++;
+         fb->tile0.width =
+            util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
+      } else {
+         /* if this assert fails then layout is impossible.. */
+         assert(fb->tile0.height > TILE_ALIGN_H);
+         fb->tile_count.height++;
+         fb->tile0.height =
+            align(DIV_ROUND_UP(fb->height, fb->tile_count.height), TILE_ALIGN_H);
+      }
+   }
+}
+
+static void
+tu_tiling_config_update_pipe_layout(struct tu_framebuffer *fb,
+                                    const struct tu_device *dev)
+{
+   const uint32_t max_pipe_count = 32; /* A6xx */
+
+   /* start from 1 tile per pipe */
+   fb->pipe0 = (VkExtent2D) {
+      .width = 1,
+      .height = 1,
+   };
+   fb->pipe_count = fb->tile_count;
+
+   while (fb->pipe_count.width * fb->pipe_count.height > max_pipe_count) {
+      if (fb->pipe0.width < fb->pipe0.height) {
+         fb->pipe0.width += 1;
+         fb->pipe_count.width =
+            DIV_ROUND_UP(fb->tile_count.width, fb->pipe0.width);
+      } else {
+         fb->pipe0.height += 1;
+         fb->pipe_count.height =
+            DIV_ROUND_UP(fb->tile_count.height, fb->pipe0.height);
+      }
+   }
+}
+
+static void
+tu_tiling_config_update_pipes(struct tu_framebuffer *fb,
+                              const struct tu_device *dev)
+{
+   const uint32_t max_pipe_count = 32; /* A6xx */
+   const uint32_t used_pipe_count =
+      fb->pipe_count.width * fb->pipe_count.height;
+   const VkExtent2D last_pipe = {
+      .width = (fb->tile_count.width - 1) % fb->pipe0.width + 1,
+      .height = (fb->tile_count.height - 1) % fb->pipe0.height + 1,
+   };
+
+   assert(used_pipe_count <= max_pipe_count);
+   assert(max_pipe_count <= ARRAY_SIZE(fb->pipe_config));
+
+   for (uint32_t y = 0; y < fb->pipe_count.height; y++) {
+      for (uint32_t x = 0; x < fb->pipe_count.width; x++) {
+         const uint32_t pipe_x = fb->pipe0.width * x;
+         const uint32_t pipe_y = fb->pipe0.height * y;
+         const uint32_t pipe_w = (x == fb->pipe_count.width - 1)
+                                    ? last_pipe.width
+                                    : fb->pipe0.width;
+         const uint32_t pipe_h = (y == fb->pipe_count.height - 1)
+                                    ? last_pipe.height
+                                    : fb->pipe0.height;
+         const uint32_t n = fb->pipe_count.width * y + x;
+
+         fb->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
+                                  A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
+                                  A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
+                                  A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
+         fb->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
+      }
+   }
+
+   memset(fb->pipe_config + used_pipe_count, 0,
+          sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
+}
+
+void
+tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
+                             const struct tu_device *device,
+                             const struct tu_render_pass *pass)
+{
+   tu_tiling_config_update_tile_layout(fb, device, pass);
+   tu_tiling_config_update_pipe_layout(fb, device);
+   tu_tiling_config_update_pipes(fb, device);
+}