turnip: add adreno 650

author Jonathan Marek <jonathan@marek.ca>

Wed, 22 Jan 2020 02:12:57 +0000 (21:12 -0500)

committer Marge Bot <eric+marge@anholt.net>

Fri, 24 Apr 2020 17:42:01 +0000 (17:42 +0000)
author Jonathan Marek <jonathan@marek.ca>
Wed, 22 Jan 2020 02:12:57 +0000 (21:12 -0500)
committer Marge Bot <eric+marge@anholt.net>
Fri, 24 Apr 2020 17:42:01 +0000 (17:42 +0000)
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c

index a9e075ec225c7f433e6f188ae9af8a027035a7da..10577c7598570324749c1eb7b03d341ac368118c 100644 (file)
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -113,10 +113,9 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
  static void
  tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
                                      const struct tu_device *dev,
-                                    uint32_t pixels)
+                                    const struct tu_render_pass *pass)
  {
-   const uint32_t tile_align_w = 64; /* note: 32 when no input attachments */
-   const uint32_t tile_align_h = 16;
+   const uint32_t tile_align_w = pass->tile_align_w;
     const uint32_t max_tile_width = 1024;
  
     /* note: don't offset the tiling config by render_area.offset,
@@ -139,43 +138,43 @@ tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
        .height = 1,
     };
     tiling->tile0.extent = (VkExtent2D) {
-      .width = align(ra_width, tile_align_w),
-      .height = align(ra_height, tile_align_h),
+      .width = util_align_npot(ra_width, tile_align_w),
+      .height = align(ra_height, TILE_ALIGN_H),
     };
  
     if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
        /* start with 2x2 tiles */
        tiling->tile_count.width = 2;
        tiling->tile_count.height = 2;
-      tiling->tile0.extent.width = align(DIV_ROUND_UP(ra_width, 2), tile_align_w);
-      tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), tile_align_h);
+      tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
+      tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
     }
  
     /* do not exceed max tile width */
     while (tiling->tile0.extent.width > max_tile_width) {
        tiling->tile_count.width++;
        tiling->tile0.extent.width =
-         align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
+         util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
     }
  
     /* will force to sysmem, don't bother trying to have a valid tile config
      * TODO: just skip all GMEM stuff when sysmem is forced?
      */
-   if (!pixels)
+   if (!pass->gmem_pixels)
        return;
  
     /* do not exceed gmem size */
-   while (tiling->tile0.extent.width * tiling->tile0.extent.height > pixels) {
+   while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
        if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
           tiling->tile_count.width++;
           tiling->tile0.extent.width =
-            align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
+            util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
        } else {
           /* if this assert fails then layout is impossible.. */
-         assert(tiling->tile0.extent.height > tile_align_h);
+         assert(tiling->tile0.extent.height > TILE_ALIGN_H);
           tiling->tile_count.height++;
           tiling->tile0.extent.height =
-            align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), tile_align_h);
+            align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
        }
     }
  }
@@ -1378,7 +1377,7 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
     tiling->render_area = *render_area;
     tiling->force_sysmem = false;
  
-   tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels);
+   tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
     tu_tiling_config_update_pipe_layout(tiling, dev);
     tu_tiling_config_update_pipes(tiling, dev);
  }
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c

index ea1ee8ee8aaf9664ecbf268cd9216a00ce931a96..30cc1442dd41ca9fd59328a5031b6cb668d66ea3 100644 (file)
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -267,6 +267,7 @@ tu_physical_device_init(struct tu_physical_device *device,
     case 618:
        device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */
        device->ccu_offset_bypass = 0x10000;
+      device->tile_align_w = 64;
        device->magic.PC_UNKNOWN_9805 = 0x0;
        device->magic.SP_UNKNOWN_A0F8 = 0x0;
        break;
@@ -274,9 +275,17 @@ tu_physical_device_init(struct tu_physical_device *device,
     case 640:
        device->ccu_offset_gmem = 0xf8000;
        device->ccu_offset_bypass = 0x20000;
+      device->tile_align_w = 64;
        device->magic.PC_UNKNOWN_9805 = 0x1;
        device->magic.SP_UNKNOWN_A0F8 = 0x1;
        break;
+   case 650:
+      device->ccu_offset_gmem = 0x114000;
+      device->ccu_offset_bypass = 0x30000;
+      device->tile_align_w = 96;
+      device->magic.PC_UNKNOWN_9805 = 0x2;
+      device->magic.SP_UNKNOWN_A0F8 = 0x2;
+      break;
     default:
        result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
                           "device %s is unsupported", device->name);
diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c

index 7d537973e5e20ee13549a3fa6c17a54bda333944..c86d7c81ff9d1571ac80b432f6f96bfab5888b96 100644 (file)
--- a/src/freedreno/vulkan/tu_pass.c
+++ b/src/freedreno/vulkan/tu_pass.c
@@ -36,20 +36,32 @@ static void update_samples(struct tu_subpass *subpass,
     subpass->samples = samples;
  }
  
-#define GMEM_ALIGN 0x4000
-
  static void
  create_render_pass_common(struct tu_render_pass *pass,
                            const struct tu_physical_device *phys_dev)
  {
+   uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
+   uint32_t tile_align_w = phys_dev->tile_align_w;
+   uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
+
     /* calculate total bytes per pixel */
     uint32_t cpp_total = 0;
     for (uint32_t i = 0; i < pass->attachment_count; i++) {
        struct tu_render_pass_attachment *att = &pass->attachments[i];
-      if (att->gmem_offset >= 0)
+      if (att->gmem_offset >= 0) {
           cpp_total += att->cpp;
+         /* texture pitch must be aligned to 64, use a tile_align_w that is
+          * a multiple of 64 for cpp==1 attachment to work as input attachment
+          */
+         if (att->cpp == 1 && tile_align_w % 64 != 0) {
+            tile_align_w *= 2;
+            block_align_shift -= 1;
+         }
+      }
     }
  
+   pass->tile_align_w = tile_align_w;
+
     /* no gmem attachments */
     if (cpp_total == 0) {
        /* any value non-zero value so tiling config works with no attachments */
@@ -64,7 +76,7 @@ create_render_pass_common(struct tu_render_pass *pass,
      * result:  nblocks = {12, 52}, pixels = 196608
      * optimal: nblocks = {13, 51}, pixels = 208896
      */
-   uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
+   uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
     uint32_t offset = 0, pixels = ~0u;
     for (uint32_t i = 0; i < pass->attachment_count; i++) {
        struct tu_render_pass_attachment *att = &pass->attachments[i];
@@ -73,14 +85,13 @@ create_render_pass_common(struct tu_render_pass *pass,
  
        att->gmem_offset = offset;
  
-      /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
-      uint32_t align = MAX2(1, att->cpp / 16);
+      uint32_t align = MAX2(1, att->cpp >> block_align_shift);
        uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
  
        gmem_blocks -= nblocks;
        cpp_total -= att->cpp;
-      offset += nblocks * GMEM_ALIGN;
-      pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
+      offset += nblocks * gmem_align;
+      pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
     }
  
     pass->gmem_pixels = pixels;
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h

index 062bcf987863002d8432b21c1b016710a6097905..ff5f25dd0b8d8e19e29956676c52e89953fa91a2 100644 (file)
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -324,6 +324,10 @@ struct tu_physical_device
     uint64_t gmem_base;
     uint32_t ccu_offset_gmem;
     uint32_t ccu_offset_bypass;
+   /* alignment for size of tiles */
+   uint32_t tile_align_w;
+#define TILE_ALIGN_H 16
+   /* gmem store/load granularity */
  #define GMEM_ALIGN_W 16
  #define GMEM_ALIGN_H 4
  
@@ -1607,6 +1611,7 @@ struct tu_render_pass
     uint32_t attachment_count;
     uint32_t subpass_count;
     uint32_t gmem_pixels;
+   uint32_t tile_align_w;
     struct tu_subpass_attachment *subpass_attachments;
     struct tu_render_pass_attachment *attachments;
     struct tu_subpass subpasses[0];
author	Jonathan Marek <jonathan@marek.ca>
	Wed, 22 Jan 2020 02:12:57 +0000 (21:12 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 24 Apr 2020 17:42:01 +0000 (17:42 +0000)
src/freedreno/vulkan/tu_cmd_buffer.c		patch \| blob \| history
src/freedreno/vulkan/tu_device.c		patch \| blob \| history
src/freedreno/vulkan/tu_pass.c		patch \| blob \| history
src/freedreno/vulkan/tu_private.h		patch \| blob \| history