turnip: Add basic SSBO support.
authorEric Anholt <eric@anholt.net>
Mon, 2 Dec 2019 22:32:53 +0000 (14:32 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 5 Dec 2019 04:32:15 +0000 (20:32 -0800)
This is enough to pass
dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_buffer.fragment.single_descriptor.*
with fragmentStoresAndAtomics set, and thus to be able to start working on
compute.  I haven't enabled that flag yet, because it also implies image
load/store support, which I haven't filled in.

Reviewed-by: Jonathan Marek <jonathan@marek.ca>
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h

index 516bbc19e5b5618a0e62c24aaed79519cabc5660..47f95e4b00a225b0aecad667662fc5c722301b18 100644 (file)
@@ -2174,6 +2174,7 @@ enum tu_draw_state_group_id
    TU_DRAW_STATE_FS_CONST,
    TU_DRAW_STATE_VS_TEX,
    TU_DRAW_STATE_FS_TEX,
+   TU_DRAW_STATE_FS_IBO,
 
    TU_DRAW_STATE_COUNT,
 };
@@ -2495,6 +2496,70 @@ tu6_emit_textures(struct tu_device *device, struct tu_cs *draw_state,
    return tu_cs_end_sub_stream(draw_state, &cs);
 }
 
+static struct tu_cs_entry
+tu6_emit_ibo(struct tu_device *device, struct tu_cs *draw_state,
+             const struct tu_pipeline *pipeline,
+             struct tu_descriptor_state *descriptors_state,
+             gl_shader_stage type)
+{
+   const struct tu_program_descriptor_linkage *link =
+      &pipeline->program.link[type];
+
+   uint32_t size = link->image_mapping.num_ibo * A6XX_TEX_CONST_DWORDS;
+   if (!size)
+      return (struct tu_cs_entry) {};
+
+   struct tu_cs cs;
+   tu_cs_begin_sub_stream(device, draw_state, size, &cs);
+
+   for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) {
+      unsigned idx = link->image_mapping.ibo_to_image[i];
+
+      if (idx & IBO_SSBO) {
+         idx &= ~IBO_SSBO;
+
+         uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
+         /* We don't expose robustBufferAccess, so leave the size unlimited. */
+         uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
+
+         tu_cs_emit(&cs, A6XX_IBO_0_FMT(TFMT6_32_UINT));
+         tu_cs_emit(&cs,
+                    A6XX_IBO_1_WIDTH(sz & MASK(15)) |
+                    A6XX_IBO_1_HEIGHT(sz >> 15));
+         tu_cs_emit(&cs,
+                    A6XX_IBO_2_UNK4 |
+                    A6XX_IBO_2_UNK31 |
+                    A6XX_IBO_2_TYPE(A6XX_TEX_1D));
+         tu_cs_emit(&cs, 0);
+         tu_cs_emit_qw(&cs, va);
+         for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
+            tu_cs_emit(&cs, 0);
+      } else {
+         tu_finishme("Emit images");
+      }
+   }
+
+   struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs);
+
+   uint64_t ibo_addr = entry.bo->iova + entry.offset;
+
+   tu_cs_begin_sub_stream(device, draw_state, 64, &cs);
+
+   /* emit texture state: */
+   tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6, 3);
+   tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+              CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+              CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+              CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
+              CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo));
+   tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
+
+   tu_cs_emit_pkt4(&cs, REG_A6XX_SP_IBO_LO, 2);
+   tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
+
+   return tu_cs_end_sub_stream(draw_state, &cs);
+}
+
 static void
 tu6_emit_border_color(struct tu_cmd_buffer *cmd,
                       struct tu_cs *cs)
@@ -2679,6 +2744,13 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
                                     descriptors_state, MESA_SHADER_FRAGMENT,
                                     &needs_border)
          };
+      draw_state_groups[draw_state_group_count++] =
+         (struct tu_draw_state_group) {
+            .id = TU_DRAW_STATE_FS_IBO,
+            .enable_mask = 0x6,
+            .ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
+                               descriptors_state, MESA_SHADER_FRAGMENT)
+         };
 
       if (needs_border)
          tu6_emit_border_color(cmd, cs);
index 7fbec1ba6192f2fd00bfbdfb0de1f2d44af332ad..8a350a054b6532ae6cb8d15decada06eb44e820d 100644 (file)
@@ -725,7 +725,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxImageArrayLayers = (1 << 11),
       .maxTexelBufferElements = 128 * 1024 * 1024,
       .maxUniformBufferRange = UINT32_MAX,
-      .maxStorageBufferRange = UINT32_MAX,
+      .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
       .maxMemoryAllocationCount = UINT32_MAX,
       .maxSamplerAllocationCount = 64 * 1024,
index 290adffe2e098becbeac662db0a0d3b38207af49..547115b0359b1c7995bcb423fe34351512f745b0 100644 (file)
@@ -450,7 +450,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
       sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE;
 
    uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) |
-                           A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp);
+                           A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) |
+                           A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo);
    if (fs->instrlen)
       sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED;
 
@@ -470,6 +471,9 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
    tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL, 1);
    tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) |
                   A6XX_HLSQ_FS_CNTL_ENABLED);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1);
+   tu_cs_emit(cs, fs->image_mapping.num_ibo);
 }
 
 static void
index 3b0096d16884b4f03f9b6b2c0add0d9481b82cf8..6387fa5050105aa84c834c26727d60323440534d 100644 (file)
@@ -93,6 +93,8 @@ typedef uint32_t xcb_window_t;
 #define NUM_META_FS_KEYS 13
 #define TU_MAX_DRM_DEVICES 8
 #define MAX_VIEWS 8
+/* The Qualcomm driver exposes 0x20000058 */
+#define MAX_STORAGE_BUFFER_RANGE 0x20000000
 
 #define NUM_DEPTH_CLEAR_PIPELINES 3