turnip: input attachment descriptor set rework
authorJonathan Marek <jonathan@marek.ca>
Mon, 15 Jun 2020 03:10:01 +0000 (23:10 -0400)
committerMarge Bot <eric+marge@anholt.net>
Wed, 17 Jun 2020 15:32:30 +0000 (15:32 +0000)
Implement GMEM input attachments by using non-bindless texture state which
is emitted at the start of every subpass.

This achieves two things:
* More vulkan-like CmdBindDescriptorSets
* Fixing secondary command buffer input attachments with GMEM

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5446>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_descriptor_set.c
src/freedreno/vulkan/tu_descriptor_set.h
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_shader.c

index affd1b3e9db017b726cbdc5d0293434d5eac251b..2c8a3411dc39ff18ce70e04315d60662f7058ff8 100644 (file)
@@ -707,13 +707,6 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
    case TU_DRAW_STATE_VI_BINNING:
       enable_mask = CP_SET_DRAW_STATE__0_BINNING;
       break;
-   case TU_DRAW_STATE_DESC_SETS_GMEM:
-      enable_mask = CP_SET_DRAW_STATE__0_GMEM;
-      break;
-   case TU_DRAW_STATE_DESC_SETS_SYSMEM:
-      enable_mask = CP_SET_DRAW_STATE__0_BINNING |
-                    CP_SET_DRAW_STATE__0_SYSMEM;
-      break;
    default:
       enable_mask = CP_SET_DRAW_STATE__0_GMEM |
                     CP_SET_DRAW_STATE__0_SYSMEM |
@@ -1263,8 +1256,91 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 }
 
 static void
-tu_emit_load_clear(struct tu_cmd_buffer *cmd,
-                   const VkRenderPassBeginInfo *info)
+tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
+                          const struct tu_subpass *subpass,
+                          bool gmem)
+{
+   /* note: we can probably emit input attachments just once for the whole
+    * renderpass, this would avoid emitting both sysmem/gmem versions
+    *
+    * emit two texture descriptors for each input, as a workaround for
+    * d24s8, which can be sampled as both float (depth) and integer (stencil)
+    * tu_shader lowers uint input attachment loads to use the 2nd descriptor
+    * in the pair
+    * TODO: a smarter workaround
+    */
+
+   if (!subpass->input_count)
+      return;
+
+   struct ts_cs_memory texture;
+   VkResult result = tu_cs_alloc(&cmd->sub_cs, subpass->input_count * 2,
+                                 A6XX_TEX_CONST_DWORDS, &texture);
+   assert(result == VK_SUCCESS);
+
+   for (unsigned i = 0; i < subpass->input_count * 2; i++) {
+      uint32_t a = subpass->input_attachments[i / 2].attachment;
+      if (a == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      struct tu_image_view *iview =
+         cmd->state.framebuffer->attachments[a].attachment;
+      const struct tu_render_pass_attachment *att =
+         &cmd->state.pass->attachments[a];
+      uint32_t *dst = &texture.map[A6XX_TEX_CONST_DWORDS * i];
+
+      memcpy(dst, iview->descriptor, A6XX_TEX_CONST_DWORDS * 4);
+
+      if (i % 2 == 1 && att->format == VK_FORMAT_D24_UNORM_S8_UINT) {
+         /* note this works because spec says fb and input attachments
+          * must use identity swizzle
+          */
+         dst[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
+            A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
+            A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK);
+         dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_S8Z24_UINT) |
+            A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_Y) |
+            A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) |
+            A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) |
+            A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE);
+      }
+
+      if (!gmem)
+         continue;
+
+      /* patched for gmem */
+      dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
+      dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
+      dst[2] &= ~(A6XX_TEX_CONST_2_TYPE__MASK | A6XX_TEX_CONST_2_PITCH__MASK);
+      dst[2] |=
+         A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
+         A6XX_TEX_CONST_2_PITCH(cmd->state.tiling_config.tile0.extent.width * att->cpp);
+      dst[3] = 0;
+      dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
+      dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
+      for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
+         dst[i] = 0;
+   }
+
+   struct tu_cs *cs = &cmd->draw_cs;
+
+   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
+   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+                  CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+                  CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
+                  CP_LOAD_STATE6_0_NUM_UNIT(subpass->input_count * 2));
+   tu_cs_emit_qw(cs, texture.iova);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_CONST_LO, 2);
+   tu_cs_emit_qw(cs, texture.iova);
+
+   tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(subpass->input_count * 2));
+}
+
+static void
+tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
+                         const VkRenderPassBeginInfo *info)
 {
    struct tu_cs *cs = &cmd->draw_cs;
 
@@ -1280,6 +1356,8 @@ tu_emit_load_clear(struct tu_cmd_buffer *cmd,
    for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
       tu_clear_gmem_attachment(cmd, cs, i, info);
 
+   tu_emit_input_attachments(cmd, cmd->state.subpass, true);
+
    tu_cond_exec_end(cs);
 
    tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
@@ -1287,6 +1365,8 @@ tu_emit_load_clear(struct tu_cmd_buffer *cmd,
    for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
       tu_clear_sysmem_attachment(cmd, cs, i, info);
 
+   tu_emit_input_attachments(cmd, cmd->state.subpass, false);
+
    tu_cond_exec_end(cs);
 }
 
@@ -1343,7 +1423,6 @@ tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_sanity_check(cs);
 }
 
-
 static void
 tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
@@ -1575,9 +1654,6 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
 
    list_del(&cmd_buffer->pool_link);
 
-   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
-      free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
-
    tu_cs_finish(&cmd_buffer->cs);
    tu_cs_finish(&cmd_buffer->draw_cs);
    tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
@@ -1598,10 +1674,8 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
    tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
    tu_cs_reset(&cmd_buffer->sub_cs);
 
-   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
-      cmd_buffer->descriptors[i].valid = 0;
-      cmd_buffer->descriptors[i].push_dirty = false;
-   }
+   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
+      memset(&cmd_buffer->descriptors[i].sets, 0, sizeof(cmd_buffer->descriptors[i].sets));
 
    cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
 
@@ -1829,31 +1903,10 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
       TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
 
       descriptors_state->sets[idx] = set;
-      descriptors_state->valid |= (1u << idx);
-
-      /* Note: the actual input attachment indices come from the shader
-       * itself, so we can't generate the patched versions of these until
-       * draw time when both the pipeline and descriptors are bound and
-       * we're inside the render pass.
-       */
-      unsigned dst_idx = layout->set[idx].input_attachment_start;
-      memcpy(&descriptors_state->input_attachments[dst_idx * A6XX_TEX_CONST_DWORDS],
-             set->dynamic_descriptors,
-             set->layout->input_attachment_count * A6XX_TEX_CONST_DWORDS * 4);
 
       for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
-         /* Dynamic buffers come after input attachments in the descriptor set
-          * itself, but due to how the Vulkan descriptor set binding works, we
-          * have to put input attachments and dynamic buffers in separate
-          * buffers in the descriptor_state and then combine them at draw
-          * time. Binding a descriptor set only invalidates the descriptor
-          * sets after it, but if we try to tightly pack the descriptors after
-          * the input attachments then we could corrupt dynamic buffers in the
-          * descriptor set before it, or we'd have to move all the dynamic
-          * buffers over. We just put them into separate buffers to make
-          * binding as well as the later patching of input attachments easy.
-          */
-         unsigned src_idx = j + set->layout->input_attachment_count;
+         /* update the contents of the dynamic descriptor set */
+         unsigned src_idx = j;
          unsigned dst_idx = j + layout->set[idx].dynamic_offset_start;
          assert(dyn_idx < dynamicOffsetCount);
 
@@ -1894,11 +1947,65 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
                         MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
       }
    }
+   assert(dyn_idx == dynamicOffsetCount);
+
+   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value;
+   uint64_t addr[MAX_SETS + 1] = {};
+   struct tu_cs cs;
+
+   for (uint32_t i = 0; i < MAX_SETS; i++) {
+      struct tu_descriptor_set *set = descriptors_state->sets[i];
+      if (set)
+         addr[i] = set->va | 3;
+   }
+
+   if (layout->dynamic_offset_count) {
+      /* allocate and fill out dynamic descriptor set */
+      struct ts_cs_memory dynamic_desc_set;
+      VkResult result = tu_cs_alloc(&cmd->sub_cs, layout->dynamic_offset_count,
+                                    A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
+      assert(result == VK_SUCCESS);
+
+      memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
+             layout->dynamic_offset_count * A6XX_TEX_CONST_DWORDS * 4);
+      addr[MAX_SETS] = dynamic_desc_set.iova | 3;
+   }
+
+   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+      sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
+      hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
+      hlsq_update_value = 0x7c000;
 
-   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE)
-      cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
-   else
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
+   } else {
+      assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE);
+
+      sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
+      hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
+      hlsq_update_value = 0x3e00;
+
+      cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
+   }
+
+   tu_cs_begin_sub_stream(&cmd->sub_cs, 24, &cs);
+
+   tu_cs_emit_pkt4(&cs, sp_bindless_base_reg, 10);
+   tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
+   tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10);
+   tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
+   tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value));
+
+   struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
+   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+      tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
+      tu_cs_emit_sds_ib(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, ib);
+      cmd->state.desc_sets_ib = ib;
+   } else {
+      /* note: for compute we could emit directly, instead of a CP_INDIRECT
+       * however, the blob uses draw states for compute
+       */
+      tu_cs_emit_ib(&cmd->cs, &ib);
+   }
 }
 
 void tu_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,
@@ -2111,7 +2218,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
       cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
 
    /* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */
-   if (pipeline->layout->dynamic_offset_count + pipeline->layout->input_attachment_count)
+   if (pipeline->layout->dynamic_offset_count)
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
 
    /* dynamic linewidth state depends pipeline state's gras_su_cntl
@@ -2666,7 +2773,7 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
       cmd->state.cache.pending_flush_bits;
    cmd->state.renderpass_cache.flush_bits = 0;
 
-   tu_emit_load_clear(cmd, pRenderPassBegin);
+   tu_emit_renderpass_begin(cmd, pRenderPassBegin);
 
    tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
    tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
@@ -2729,12 +2836,16 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
       }
    }
 
+   tu_emit_input_attachments(cmd, cmd->state.subpass, true);
+
    tu_cond_exec_end(cs);
 
    tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
 
    tu6_emit_sysmem_resolves(cmd, cs, subpass);
 
+   tu_emit_input_attachments(cmd, cmd->state.subpass, false);
+
    tu_cond_exec_end(cs);
 
    /* Handle dependencies for the next subpass */
@@ -2857,14 +2968,6 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
          descriptors_state->dynamic_descriptors :
          descriptors_state->sets[state->range[i].bindless_base]->mapped_ptr;
       unsigned block = state->range[i].block;
-      /* If the block in the shader here is in the dynamic descriptor set, it
-       * is an index into the dynamic descriptor set which is combined from
-       * dynamic descriptors and input attachments on-the-fly, and we don't
-       * have access to it here. Instead we work backwards to get the index
-       * into dynamic_descriptors.
-       */
-      if (state->range[i].bindless_base == MAX_SETS)
-         block -= pipeline->layout->input_attachment_count;
       uint32_t *desc = base + block * A6XX_TEX_CONST_DWORDS;
       uint64_t va = desc[0] | ((uint64_t)(desc[1] & A6XX_UBO_1_BASE_HI__MASK) << 32);
       assert(va);
@@ -2957,143 +3060,6 @@ tu6_emit_vertex_buffers(struct tu_cmd_buffer *cmd,
    return tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
 }
 
-static VkResult
-tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
-                         const struct tu_pipeline *pipeline,
-                         VkPipelineBindPoint bind_point,
-                         struct tu_cs_entry *entry,
-                         bool gmem)
-{
-   struct tu_cs *draw_state = &cmd->sub_cs;
-   struct tu_pipeline_layout *layout = pipeline->layout;
-   struct tu_descriptor_state *descriptors_state =
-      tu_get_descriptors_state(cmd, bind_point);
-   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-   const uint32_t *input_attachment_idx =
-      pipeline->program.input_attachment_idx;
-   uint32_t num_dynamic_descs = layout->dynamic_offset_count +
-      layout->input_attachment_count;
-   struct ts_cs_memory dynamic_desc_set;
-   VkResult result;
-
-   if (num_dynamic_descs > 0) {
-      /* allocate and fill out dynamic descriptor set */
-      result = tu_cs_alloc(draw_state, num_dynamic_descs,
-                           A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
-      if (result != VK_SUCCESS)
-         return result;
-
-      memcpy(dynamic_desc_set.map, descriptors_state->input_attachments,
-             layout->input_attachment_count * A6XX_TEX_CONST_DWORDS * 4);
-
-      if (gmem) {
-         /* Patch input attachments to refer to GMEM instead */
-         for (unsigned i = 0; i < layout->input_attachment_count; i++) {
-            uint32_t *dst =
-               &dynamic_desc_set.map[A6XX_TEX_CONST_DWORDS * i];
-
-            /* The compiler has already laid out input_attachment_idx in the
-             * final order of input attachments, so there's no need to go
-             * through the pipeline layout finding input attachments.
-             */
-            unsigned attachment_idx = input_attachment_idx[i];
-
-            /* It's possible for the pipeline layout to include an input
-             * attachment which doesn't actually exist for the current
-             * subpass. Of course, this is only valid so long as the pipeline
-             * doesn't try to actually load that attachment. Just skip
-             * patching in that scenario to avoid out-of-bounds accesses.
-             */
-            if (attachment_idx >= cmd->state.subpass->input_count)
-               continue;
-
-            uint32_t a = cmd->state.subpass->input_attachments[attachment_idx].attachment;
-            const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a];
-
-            assert(att->gmem_offset >= 0);
-
-            dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
-            dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
-            dst[2] &= ~(A6XX_TEX_CONST_2_TYPE__MASK | A6XX_TEX_CONST_2_PITCH__MASK);
-            dst[2] |=
-               A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
-               A6XX_TEX_CONST_2_PITCH(tiling->tile0.extent.width * att->cpp);
-            dst[3] = 0;
-            dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
-            dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
-            for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
-               dst[i] = 0;
-
-            if (cmd->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
-               tu_finishme("patch input attachment pitch for secondary cmd buffer");
-         }
-      }
-
-      memcpy(dynamic_desc_set.map + layout->input_attachment_count * A6XX_TEX_CONST_DWORDS,
-             descriptors_state->dynamic_descriptors,
-             layout->dynamic_offset_count * A6XX_TEX_CONST_DWORDS * 4);
-   }
-
-   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg;
-   uint32_t hlsq_update_value;
-   switch (bind_point) {
-   case VK_PIPELINE_BIND_POINT_GRAPHICS:
-      sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
-      hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
-      hlsq_update_value = 0x7c000;
-      break;
-   case VK_PIPELINE_BIND_POINT_COMPUTE:
-      sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
-      hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
-      hlsq_update_value = 0x3e00;
-      break;
-   default:
-      unreachable("bad bind point");
-   }
-
-   /* Be careful here to *not* refer to the pipeline, so that if only the
-    * pipeline changes we don't have to emit this again (except if there are
-    * dynamic descriptors in the pipeline layout). This means always emitting
-    * all the valid descriptors, which means that we always have to put the
-    * dynamic descriptor in the driver-only slot at the end
-    */
-   uint32_t num_user_sets = util_last_bit(descriptors_state->valid);
-   uint32_t num_sets = num_user_sets;
-   if (num_dynamic_descs > 0) {
-      num_user_sets = MAX_SETS;
-      num_sets = num_user_sets + 1;
-   }
-
-   unsigned regs[2] = { sp_bindless_base_reg, hlsq_bindless_base_reg };
-
-   struct tu_cs cs;
-   result = tu_cs_begin_sub_stream(draw_state, ARRAY_SIZE(regs) * (1 + num_sets * 2) + 2, &cs);
-   if (result != VK_SUCCESS)
-      return result;
-
-   if (num_sets > 0) {
-      for (unsigned i = 0; i < ARRAY_SIZE(regs); i++) {
-         tu_cs_emit_pkt4(&cs, regs[i], num_sets * 2);
-         for (unsigned j = 0; j < num_user_sets; j++) {
-            if (descriptors_state->valid & (1 << j)) {
-               /* magic | 3 copied from the blob */
-               tu_cs_emit_qw(&cs, descriptors_state->sets[j]->va | 3);
-            } else {
-               tu_cs_emit_qw(&cs, 0 | 3);
-            }
-         }
-         if (num_dynamic_descs > 0) {
-            tu_cs_emit_qw(&cs, dynamic_desc_set.iova | 3);
-         }
-      }
-
-      tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(hlsq_update_value));
-   }
-
-   *entry = tu_cs_end_sub_stream(draw_state, &cs);
-   return VK_SUCCESS;
-}
-
 static void
 tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
@@ -3184,41 +3150,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
    if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS)
       tu6_emit_streamout(cmd, cs);
 
-   /* If there are any any dynamic descriptors, then we may need to re-emit
-    * them after every pipeline change in case the number of input attachments
-    * changes. We also always need to re-emit after a pipeline change if there
-    * are any input attachments, because the input attachment index comes from
-    * the pipeline. Finally, it can also happen that the subpass changes
-    * without the pipeline changing, in which case the GMEM descriptors need
-    * to be patched differently.
-    *
-    * TODO: We could probably be clever and avoid re-emitting state on
-    * pipeline changes if the number of input attachments is always 0. We
-    * could also only re-emit dynamic state.
-    */
    if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
-      bool need_gmem_desc_set = pipeline->layout->input_attachment_count > 0;
-
-      result = tu6_emit_descriptor_sets(cmd, pipeline,
-                                        VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                        &cmd->state.desc_sets_ib, false);
-      if (result != VK_SUCCESS)
-         return result;
-
-      if (need_gmem_desc_set) {
-         cmd->state.desc_sets_sysmem_ib = cmd->state.desc_sets_ib;
-         cmd->state.desc_sets_ib.size = 0;
-
-         result = tu6_emit_descriptor_sets(cmd, pipeline,
-                                           VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                            &cmd->state.desc_sets_gmem_ib, true);
-         if (result != VK_SUCCESS)
-            return result;
-      } else {
-         cmd->state.desc_sets_gmem_ib.size = 0;
-         cmd->state.desc_sets_sysmem_ib.size = 0;
-      }
-
       /* We need to reload the descriptors every time the descriptor sets
        * change. However, the commands we send only depend on the pipeline
        * because the whole point is to cache descriptors which are used by the
@@ -3274,8 +3206,6 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
-      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
-      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
@@ -3293,7 +3223,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
        */
       uint32_t draw_state_count =
          ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) +
-         ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 4 : 0) +
+         ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 1 : 0) +
          ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
          1; /* vs_params */
 
@@ -3304,12 +3234,8 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
          }
-         if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
-            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
-            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
-            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
+         if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS)
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
-         }
          if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
          tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
@@ -3641,7 +3567,6 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
    struct tu_pipeline *pipeline = cmd->state.compute_pipeline;
    struct tu_descriptor_state *descriptors_state =
       &cmd->descriptors[VK_PIPELINE_BIND_POINT_COMPUTE];
-   VkResult result;
 
    /* TODO: We could probably flush less if we add a compute_flush_bits
     * bitfield.
@@ -3659,19 +3584,6 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
 
    tu_emit_compute_driver_params(cs, pipeline, info);
 
-   if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS) {
-      result = tu6_emit_descriptor_sets(cmd, pipeline,
-                                        VK_PIPELINE_BIND_POINT_COMPUTE, &ib,
-                                        false);
-      if (result != VK_SUCCESS) {
-         cmd->record_result = result;
-         return;
-      }
-   }
-
-   if (ib.size)
-      tu_cs_emit_ib(cs, &ib);
-
    if ((cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS) &&
        pipeline->load_state.state_ib.size > 0) {
       tu_cs_emit_ib(cs, &pipeline->load_state.state_ib);
index c0d349c12c712ee9ac5b067439fc3c9439beaa11..314088cefd90b01327585d5d1dd735739f1291d4 100644 (file)
@@ -84,6 +84,7 @@ descriptor_size(VkDescriptorType type)
    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
       /* These are remapped to the special driver-managed descriptor set,
        * hence they don't take up any space in the original descriptor set:
+       * Input attachment doesn't use descriptor sets at all
        */
       return 0;
    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
@@ -175,7 +176,6 @@ tu_CreateDescriptorSetLayout(
           size - sizeof(struct tu_descriptor_set_layout));
 
    uint32_t dynamic_offset_count = 0;
-   uint32_t input_attachment_count = 0;
    uint32_t buffer_count = 0;
 
    for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
@@ -187,7 +187,6 @@ tu_CreateDescriptorSetLayout(
       set_layout->binding[b].offset = set_layout->size;
       set_layout->binding[b].buffer_offset = buffer_count;
       set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
-      set_layout->binding[b].input_attachment_offset = input_attachment_count;
       set_layout->binding[b].size = descriptor_size(binding->descriptorType);
       set_layout->binding[b].shader_stages = binding->stageFlags;
 
@@ -250,15 +249,13 @@ tu_CreateDescriptorSetLayout(
 
          dynamic_offset_count += binding->descriptorCount;
       }
-      if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)
-         input_attachment_count += binding->descriptorCount;
+
       set_layout->shader_stages |= binding->stageFlags;
    }
 
    free(bindings);
 
    set_layout->dynamic_offset_count = dynamic_offset_count;
-   set_layout->input_attachment_count = input_attachment_count;
    set_layout->buffer_count = buffer_count;
 
    *pSetLayout = tu_descriptor_set_layout_to_handle(set_layout);
@@ -364,10 +361,9 @@ tu_CreatePipelineLayout(VkDevice _device,
       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
    layout->num_sets = pCreateInfo->setLayoutCount;
-   layout->input_attachment_count = 0;
    layout->dynamic_offset_count = 0;
 
-   unsigned dynamic_offset_count = 0, input_attachment_count = 0;
+   unsigned dynamic_offset_count = 0;
 
    _mesa_sha1_init(&ctx);
    for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
@@ -375,9 +371,7 @@ tu_CreatePipelineLayout(VkDevice _device,
                      pCreateInfo->pSetLayouts[set]);
       layout->set[set].layout = set_layout;
       layout->set[set].dynamic_offset_start = dynamic_offset_count;
-      layout->set[set].input_attachment_start = input_attachment_count;
       dynamic_offset_count += set_layout->dynamic_offset_count;
-      input_attachment_count += set_layout->input_attachment_count;
 
       for (uint32_t b = 0; b < set_layout->binding_count; b++) {
          if (set_layout->binding[b].immutable_samplers_offset)
@@ -392,7 +386,6 @@ tu_CreatePipelineLayout(VkDevice _device,
    }
 
    layout->dynamic_offset_count = dynamic_offset_count;
-   layout->input_attachment_count = input_attachment_count;
    layout->push_constant_size = 0;
 
    for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
@@ -445,8 +438,7 @@ tu_descriptor_set_create(struct tu_device *device,
    unsigned dynamic_offset = sizeof(struct tu_descriptor_set) +
       sizeof(struct tu_bo *) * buffer_count;
    unsigned mem_size = dynamic_offset +
-      A6XX_TEX_CONST_DWORDS * 4 * (layout->dynamic_offset_count +
-                                   layout->input_attachment_count);;
+      A6XX_TEX_CONST_DWORDS * 4 * layout->dynamic_offset_count;
 
    if (pool->host_memory_base) {
       if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
@@ -464,7 +456,7 @@ tu_descriptor_set_create(struct tu_device *device,
 
    memset(set, 0, mem_size);
 
-   if (layout->dynamic_offset_count + layout->input_attachment_count > 0) {
+   if (layout->dynamic_offset_count) {
       set->dynamic_descriptors = (uint32_t *)((uint8_t*)set + dynamic_offset);
    }
 
@@ -590,7 +582,6 @@ tu_CreateDescriptorPool(VkDevice _device,
       switch(pCreateInfo->pPoolSizes[i].type) {
       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
          dynamic_count += pCreateInfo->pPoolSizes[i].descriptorCount;
       default:
          break;
@@ -903,7 +894,7 @@ tu_update_descriptor_sets(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
             assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
             unsigned idx = writeset->dstArrayElement + j;
-            idx += set->layout->input_attachment_count + binding_layout->dynamic_offset_offset;
+            idx += binding_layout->dynamic_offset_offset;
             write_ubo_descriptor(device, cmd_buffer,
                                  set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
                                  buffer_list, writeset->pBufferInfo + j);
@@ -916,7 +907,7 @@ tu_update_descriptor_sets(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
             assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
             unsigned idx = writeset->dstArrayElement + j;
-            idx += set->layout->input_attachment_count + binding_layout->dynamic_offset_offset;
+            idx += binding_layout->dynamic_offset_offset;
             write_buffer_descriptor(device, cmd_buffer,
                                     set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
                                     buffer_list, writeset->pBufferInfo + j);
@@ -937,15 +928,6 @@ tu_update_descriptor_sets(struct tu_device *device,
                                    writeset->descriptorType,
                                    writeset->pImageInfo + j);
             break;
-         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
-            unsigned idx = writeset->dstArrayElement + j;
-            idx += binding_layout->input_attachment_offset;
-            write_image_descriptor(device, cmd_buffer,
-                                    set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
-                                    buffer_list, writeset->descriptorType,
-                                    writeset->pImageInfo + j);
-            break;
-         }
          case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
             write_combined_image_sampler_descriptor(device, cmd_buffer,
                                                     A6XX_TEX_CONST_DWORDS * 4,
@@ -957,6 +939,9 @@ tu_update_descriptor_sets(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_SAMPLER:
             write_sampler_descriptor(device, ptr, writeset->pImageInfo + j);
             break;
+         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+            /* nothing in descriptor set - framebuffer state is used instead */
+            break;
          default:
             unreachable("unimplemented descriptor type");
             break;
@@ -999,8 +984,6 @@ tu_update_descriptor_sets(struct tu_device *device,
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
             unsigned src_idx = copyset->srcArrayElement + j;
             unsigned dst_idx = copyset->dstArrayElement + j;
-            src_idx += src_set->layout->input_attachment_count;
-            dst_idx += dst_set->layout->input_attachment_count;
             src_idx += src_binding_layout->dynamic_offset_offset;
             dst_idx += dst_binding_layout->dynamic_offset_offset;
 
@@ -1010,18 +993,6 @@ tu_update_descriptor_sets(struct tu_device *device,
             memcpy(dst_dynamic, src_dynamic, A6XX_TEX_CONST_DWORDS * 4);
             break;
          }
-         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
-            unsigned src_idx = copyset->srcArrayElement + j;
-            unsigned dst_idx = copyset->dstArrayElement + j;
-            src_idx += src_binding_layout->input_attachment_offset;
-            dst_idx += dst_binding_layout->input_attachment_offset;
-
-            uint32_t *src_dynamic, *dst_dynamic;
-            src_dynamic = src_set->dynamic_descriptors + src_idx * A6XX_TEX_CONST_DWORDS;
-            dst_dynamic = dst_set->dynamic_descriptors + dst_idx * A6XX_TEX_CONST_DWORDS;
-            memcpy(dst_dynamic, src_dynamic, A6XX_TEX_CONST_DWORDS * 4);
-            break;
-         }
          default:
             memcpy(dst_ptr, src_ptr, src_binding_layout->size);
          }
@@ -1099,13 +1070,7 @@ tu_CreateDescriptorUpdateTemplate(
       switch (entry->descriptorType) {
       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-         dst_offset = (set_layout->input_attachment_count +
-            binding_layout->dynamic_offset_offset +
-            entry->dstArrayElement) * A6XX_TEX_CONST_DWORDS;
-         dst_stride = A6XX_TEX_CONST_DWORDS;
-         break;
-      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-         dst_offset = (binding_layout->input_attachment_offset +
+         dst_offset = (binding_layout->dynamic_offset_offset +
             entry->dstArrayElement) * A6XX_TEX_CONST_DWORDS;
          dst_stride = A6XX_TEX_CONST_DWORDS;
          break;
@@ -1197,16 +1162,11 @@ tu_update_descriptor_set_with_template(
             break;
          case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
             write_image_descriptor(device, cmd_buffer, ptr, buffer_list,
                                    templ->entry[i].descriptor_type,
                                    src);
             break;
-         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
-            write_image_descriptor(device, cmd_buffer,
-                                    set->dynamic_descriptors + dst_offset,
-                                    buffer_list, templ->entry[i].descriptor_type,
-                                    src);
-            break;
          }
          case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
             write_combined_image_sampler_descriptor(device, cmd_buffer,
index 22b5eb039707f1e86c09a49e4590fcb2493d9316..8673729f0d83a52967f81f34542ef8306405ffc5 100644 (file)
@@ -54,9 +54,6 @@ struct tu_descriptor_set_binding_layout
     */
    uint32_t dynamic_offset_offset;
 
-   /* Index into the array of dynamic input attachment descriptors */
-   uint32_t input_attachment_offset;
-
    /* Offset in the tu_descriptor_set_layout of the immutable samplers, or 0
     * if there are no immutable samplers. */
    uint32_t immutable_samplers_offset;
@@ -86,9 +83,6 @@ struct tu_descriptor_set_layout
    /* Number of dynamic offsets used by this descriptor set */
    uint16_t dynamic_offset_count;
 
-   /* Number of input attachments used by the descriptor set */
-   uint16_t input_attachment_count;
-
    /* A bitfield of which dynamic buffers are ubo's, to make the
     * descriptor-binding-time patching easier.
     */
@@ -110,13 +104,11 @@ struct tu_pipeline_layout
       struct tu_descriptor_set_layout *layout;
       uint32_t size;
       uint32_t dynamic_offset_start;
-      uint32_t input_attachment_start;
    } set[MAX_SETS];
 
    uint32_t num_sets;
    uint32_t push_constant_size;
    uint32_t dynamic_offset_count;
-   uint32_t input_attachment_count;
 
    unsigned char sha1[20];
 };
index e22b301099b8cd5ceeb2ab71afa6b0b1b0ddb232..e556596c854c518a9097a466090c210365b5835f 100644 (file)
@@ -183,8 +183,7 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
          switch (binding->type) {
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
             base = MAX_SETS;
-            offset = (layout->input_attachment_count +
-                      layout->set[i].dynamic_offset_start +
+            offset = (layout->set[i].dynamic_offset_start +
                       binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
             /* fallthrough */
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -201,9 +200,8 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
             }
             break;
          case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-            base = MAX_SETS;
-            offset = (layout->set[i].input_attachment_start +
-                      binding->input_attachment_offset) * A6XX_TEX_CONST_DWORDS;
+            /* nothing - input attachment doesn't use bindless */
+            break;
          case VK_DESCRIPTOR_TYPE_SAMPLER:
          case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
@@ -217,8 +215,7 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
          }
          case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
             base = MAX_SETS;
-            offset = (layout->input_attachment_count +
-                      layout->set[i].dynamic_offset_start +
+            offset = (layout->set[i].dynamic_offset_start +
                       binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
             /* fallthrough */
          case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
@@ -2055,12 +2052,6 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
       desc_sets |= builder->shaders[i]->active_desc_sets;
    }
    pipeline->active_desc_sets = desc_sets;
-
-   if (builder->shaders[MESA_SHADER_FRAGMENT]) {
-      memcpy(pipeline->program.input_attachment_idx,
-             builder->shaders[MESA_SHADER_FRAGMENT]->attachment_idx,
-             sizeof(pipeline->program.input_attachment_idx));
-   }
 }
 
 static void
index ddb25677b48f5a3190ed62f2270e0b8368a28978..213e4bd48437755442fd4116c54872a053d090b4 100644 (file)
@@ -436,8 +436,6 @@ enum tu_draw_state_group_id
    TU_DRAW_STATE_GS_CONST,
    TU_DRAW_STATE_FS_CONST,
    TU_DRAW_STATE_DESC_SETS,
-   TU_DRAW_STATE_DESC_SETS_GMEM,
-   TU_DRAW_STATE_DESC_SETS_SYSMEM,
    TU_DRAW_STATE_DESC_SETS_LOAD,
    TU_DRAW_STATE_VS_PARAMS,
 
@@ -630,11 +628,7 @@ tu_get_perftest_option_name(int id);
 struct tu_descriptor_state
 {
    struct tu_descriptor_set *sets[MAX_SETS];
-   uint32_t valid;
-   struct tu_push_descriptor_set push_set;
-   bool push_dirty;
    uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
-   uint32_t input_attachments[MAX_RTS * A6XX_TEX_CONST_DWORDS];
 };
 
 struct tu_tile
@@ -821,7 +815,7 @@ struct tu_cmd_state
    struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
    struct tu_cs_entry vertex_buffers_ib;
    struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES];
-   struct tu_cs_entry desc_sets_ib, desc_sets_gmem_ib, desc_sets_sysmem_ib, desc_sets_load_ib;
+   struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
 
    /* Stream output buffers */
    struct
@@ -1055,7 +1049,6 @@ struct tu_shader
    struct ir3_shader *ir3_shader;
 
    struct tu_push_constant_range push_consts;
-   unsigned attachment_idx[MAX_RTS];
    uint8_t active_desc_sets;
 };
 
@@ -1109,7 +1102,6 @@ struct tu_pipeline
       struct tu_cs_entry binning_state_ib;
 
       struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
-      unsigned input_attachment_idx[MAX_RTS];
    } program;
 
    struct
index 54ea0b86d2198929fa8cf1be7737d405c90f7d3f..1a0772726f553d92ba87b489a700e9588e4451f2 100644 (file)
@@ -141,8 +141,7 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
       base = layout->set[set].dynamic_offset_start +
-         binding_layout->dynamic_offset_offset +
-         layout->input_attachment_count;
+         binding_layout->dynamic_offset_offset;
       set = MAX_SETS;
       break;
    default:
@@ -177,31 +176,42 @@ build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
    const struct tu_descriptor_set_binding_layout *bind_layout =
       &layout->set[set].layout->binding[binding];
 
+   /* input attachments use non bindless workaround */
+   if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
+      const struct glsl_type *glsl_type = glsl_without_array(var->type);
+      uint32_t idx = var->data.index * 2;
+
+      b->shader->info.textures_used |=
+         ((1ull << (bind_layout->array_size * 2)) - 1) << (idx * 2);
+
+      /* D24S8 workaround: stencil of D24S8 will be sampled as uint */
+      if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
+         idx += 1;
+
+      if (deref->deref_type == nir_deref_type_var)
+         return nir_imm_int(b, idx);
+
+      nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
+      return nir_iadd(b, nir_imm_int(b, idx),
+                      nir_imul_imm(b, arr_index, 2));
+   }
+
    shader->active_desc_sets |= 1u << set;
 
    nir_ssa_def *desc_offset;
    unsigned descriptor_stride;
-   if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
-      unsigned offset =
-         layout->set[set].input_attachment_start +
-         bind_layout->input_attachment_offset;
-      desc_offset = nir_imm_int(b, offset);
-      set = MAX_SETS;
-      descriptor_stride = 1;
-   } else {
-      unsigned offset = 0;
-      /* Samplers come second in combined image/sampler descriptors, see
-       * write_combined_image_sampler_descriptor().
-       */
-      if (is_sampler && bind_layout->type ==
-          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
-         offset = 1;
-      }
-      desc_offset =
-         nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
-                     offset);
-      descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
+   unsigned offset = 0;
+   /* Samplers come second in combined image/sampler descriptors, see
+      * write_combined_image_sampler_descriptor().
+      */
+   if (is_sampler && bind_layout->type ==
+         VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+      offset = 1;
    }
+   desc_offset =
+      nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
+                  offset);
+   descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
 
    if (deref->deref_type != nir_deref_type_var) {
       assert(deref->deref_type == nir_deref_type_array);
@@ -356,6 +366,10 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
       nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
                             nir_src_for_ssa(bindless));
       tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
+
+      /* for the input attachment case: */
+      if (bindless->parent_instr->type != nir_instr_type_intrinsic)
+         tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
    }
 
    return true;
@@ -435,38 +449,6 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
       align(max, 16) / 16 - tu_shader->push_consts.lo;
 }
 
-/* Gather the InputAttachmentIndex for each input attachment from the NIR
- * shader and organize the info in a way so that draw-time patching is easy.
- */
-static void
-gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader,
-                         const struct tu_pipeline_layout *layout)
-{
-   nir_foreach_variable(var, &shader->uniforms) {
-      const struct glsl_type *glsl_type = glsl_without_array(var->type);
-
-      if (!glsl_type_is_image(glsl_type))
-         continue;
-
-      enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
-
-      const uint32_t set = var->data.descriptor_set;
-      const uint32_t binding = var->data.binding;
-      const struct tu_descriptor_set_binding_layout *bind_layout =
-            &layout->set[set].layout->binding[binding];
-      const uint32_t array_size = bind_layout->array_size;
-
-      if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
-          dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
-         unsigned offset =
-            layout->set[set].input_attachment_start +
-            bind_layout->input_attachment_offset;
-         for (unsigned i = 0; i < array_size; i++)
-            tu_shader->attachment_idx[offset + i] = var->data.index + i;
-      }
-   }
-}
-
 static bool
 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
             const struct tu_pipeline_layout *layout)
@@ -474,7 +456,6 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
    bool progress = false;
 
    gather_push_constants(shader, tu_shader);
-   gather_input_attachments(shader, tu_shader, layout);
 
    nir_foreach_function(function, shader) {
       if (function->impl)