turnip: fix huge scissor min/max case
[mesa.git] / src / freedreno / vulkan / tu_pipeline.c
index 59170cbea160f7e20543e0c3f52e595ff70f9617..54d011b002ad2644b205221f30e41b8b467edb72 100644 (file)
@@ -243,9 +243,8 @@ struct tu_pipeline_builder
    struct tu_shader *shaders[MESA_SHADER_STAGES];
    struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
    struct ir3_shader_variant *binning_variant;
-   uint32_t shader_offsets[MESA_SHADER_STAGES];
-   uint32_t binning_vs_offset;
-   uint32_t shader_total_size;
+   uint64_t shader_iova[MESA_SHADER_STAGES];
+   uint64_t binning_vs_iova;
 
    bool rasterizer_discard;
    /* these states are affectd by rasterizer_discard */
@@ -413,7 +412,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
    tu_cs_emit(cs, xs->instrlen);
 
    tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
-   tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(xs->constlen, 4)) |
+   tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
                   A6XX_HLSQ_VS_CNTL_ENABLED);
 
    /* emit program binary
@@ -1210,7 +1209,8 @@ static void
 tu6_emit_fs_outputs(struct tu_cs *cs,
                     const struct ir3_shader_variant *fs,
                     uint32_t mrt_count, bool dual_src_blend,
-                    uint32_t render_components)
+                    uint32_t render_components,
+                    bool is_s8_uint)
 {
    uint32_t smask_regid, posz_regid;
 
@@ -1256,7 +1256,7 @@ tu6_emit_fs_outputs(struct tu_cs *cs,
 
    enum a6xx_ztest_mode zmode;
 
-   if (fs->no_earlyz || fs->has_kill || fs->writes_pos) {
+   if (fs->no_earlyz || fs->has_kill || fs->writes_pos || is_s8_uint) {
       zmode = A6XX_LATE_Z;
    } else {
       zmode = A6XX_EARLY_Z;
@@ -1334,7 +1334,6 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
 static void
 tu6_emit_program(struct tu_cs *cs,
                  struct tu_pipeline_builder *builder,
-                 const struct tu_bo *binary_bo,
                  bool binning_pass)
 {
    const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
@@ -1355,8 +1354,7 @@ tu6_emit_program(struct tu_cs *cs,
    */
    if (binning_pass && !gs) {
       vs = bs;
-      tu6_emit_xs_config(cs, stage, bs,
-                         binary_bo->iova + builder->binning_vs_offset);
+      tu6_emit_xs_config(cs, stage, bs, builder->binning_vs_iova);
       stage++;
    }
 
@@ -1366,8 +1364,7 @@ tu6_emit_program(struct tu_cs *cs,
       if (stage == MESA_SHADER_FRAGMENT && binning_pass)
          fs = xs = NULL;
 
-      tu6_emit_xs_config(cs, stage, xs,
-                         binary_bo->iova + builder->shader_offsets[stage]);
+      tu6_emit_xs_config(cs, stage, xs, builder->shader_iova[stage]);
    }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
@@ -1380,14 +1377,16 @@ tu6_emit_program(struct tu_cs *cs,
       tu6_emit_fs_inputs(cs, fs);
       tu6_emit_fs_outputs(cs, fs, builder->color_attachment_count,
                           builder->use_dual_src_blend,
-                          builder->render_components);
+                          builder->render_components,
+                          builder->depth_attachment_format == VK_FORMAT_S8_UINT);
    } else {
       /* TODO: check if these can be skipped if fs is disabled */
       struct ir3_shader_variant dummy_variant = {};
       tu6_emit_fs_inputs(cs, &dummy_variant);
       tu6_emit_fs_outputs(cs, &dummy_variant, builder->color_attachment_count,
                           builder->use_dual_src_blend,
-                          builder->render_components);
+                          builder->render_components,
+                          builder->depth_attachment_format == VK_FORMAT_S8_UINT);
    }
 
    if (gs || hs) {
@@ -1405,6 +1404,7 @@ tu6_emit_vertex_input(struct tu_cs *cs,
 {
    uint32_t vfd_decode_idx = 0;
    uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
+   uint32_t step_rate[MAX_VBS];
 
    for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
       const VkVertexInputBindingDescription *binding =
@@ -1417,6 +1417,17 @@ tu6_emit_vertex_input(struct tu_cs *cs,
          binding_instanced |= 1 << binding->binding;
 
       *bindings_used |= 1 << binding->binding;
+      step_rate[binding->binding] = 1;
+   }
+
+   const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state =
+      vk_find_struct_const(info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
+   if (div_state) {
+      for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) {
+         const VkVertexInputBindingDivisorDescriptionEXT *desc =
+            &div_state->pVertexBindingDivisors[i];
+         step_rate[desc->binding] = desc->divisor;
+      }
    }
 
    /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
@@ -1426,6 +1437,8 @@ tu6_emit_vertex_input(struct tu_cs *cs,
          &info->pVertexAttributeDescriptions[i];
       uint32_t input_idx;
 
+      assert(*bindings_used & BIT(attr->binding));
+
       for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
          if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
             break;
@@ -1445,7 +1458,7 @@ tu6_emit_vertex_input(struct tu_cs *cs,
                         .swap = format.swap,
                         .unk30 = 1,
                         ._float = !vk_format_is_int(attr->format)),
-                      A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1));
+                      A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, step_rate[attr->binding]));
 
       tu_cs_emit_regs(cs,
                       A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
@@ -1537,17 +1550,30 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
 void
 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
 {
-   const VkOffset2D min = scissor->offset;
-   const VkOffset2D max = {
+   VkOffset2D min = scissor->offset;
+   VkOffset2D max = {
       scissor->offset.x + scissor->extent.width,
       scissor->offset.y + scissor->extent.height,
    };
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
-   tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) |
-                     A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y));
-   tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) |
-                     A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
+   /* special case for empty scissor with max == 0 to avoid overflow */
+   if (max.x == 0)
+      min.x = max.x = 1;
+   if (max.y == 0)
+      min.y = max.y = 1;
+
+   /* avoid overflow with large scissor
+    * note the max will be limited to min - 1, so that empty scissor works
+    */
+   uint32_t scissor_max = BITFIELD_MASK(15);
+   min.x = MIN2(scissor_max, min.x);
+   min.y = MIN2(scissor_max, min.y);
+   max.x = MIN2(scissor_max, max.x);
+   max.y = MIN2(scissor_max, max.y);
+
+   tu_cs_emit_regs(cs,
+                   A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = min.x, .y = min.y),
+                   A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = max.x - 1, .y = max.y - 1));
 }
 
 void
@@ -1633,14 +1659,12 @@ tu6_emit_depth_control(struct tu_cs *cs,
                        const VkPipelineDepthStencilStateCreateInfo *ds_info,
                        const VkPipelineRasterizationStateCreateInfo *rast_info)
 {
-   assert(!ds_info->depthBoundsTestEnable);
-
    uint32_t rb_depth_cntl = 0;
    if (ds_info->depthTestEnable) {
       rb_depth_cntl |=
          A6XX_RB_DEPTH_CNTL_Z_ENABLE |
          A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
-         A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+         A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
 
       if (rast_info->depthClampEnable)
          rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
@@ -1649,6 +1673,9 @@ tu6_emit_depth_control(struct tu_cs *cs,
          rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
    }
 
+   if (ds_info->depthBoundsTestEnable)
+         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1);
    tu_cs_emit(cs, rb_depth_cntl);
 }
@@ -1800,34 +1827,32 @@ tu6_emit_blend_control(struct tu_cs *cs,
 }
 
 static VkResult
-tu_pipeline_create(struct tu_device *dev,
-                   struct tu_pipeline_layout *layout,
-                   bool compute,
-                   const VkAllocationCallbacks *pAllocator,
-                   struct tu_pipeline **out_pipeline)
+tu_pipeline_allocate_cs(struct tu_device *dev,
+                        struct tu_pipeline *pipeline,
+                        struct tu_pipeline_builder *builder,
+                        struct ir3_shader_variant *compute)
 {
-   struct tu_pipeline *pipeline =
-      vk_zalloc2(&dev->alloc, pAllocator, sizeof(*pipeline), 8,
-                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-   if (!pipeline)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   uint32_t size = 2048 + tu6_load_state_size(pipeline->layout, compute);
 
-   tu_cs_init(&pipeline->cs, dev, TU_CS_MODE_SUB_STREAM, 2048);
+   /* graphics case: */
+   if (builder) {
+      for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
+         if (builder->variants[i])
+            size += builder->variants[i]->info.sizedwords;
+      }
+
+      size += builder->binning_variant->info.sizedwords;
+   } else {
+      size += compute->info.sizedwords;
+   }
+
+   tu_cs_init(&pipeline->cs, dev, TU_CS_MODE_SUB_STREAM, size);
 
    /* Reserve the space now such that tu_cs_begin_sub_stream never fails. Note
     * that LOAD_STATE can potentially take up a large amount of space so we
     * calculate its size explicitly.
    */
-   unsigned load_state_size = tu6_load_state_size(layout, compute);
-   VkResult result = tu_cs_reserve_space(&pipeline->cs, 2048 + load_state_size);
-   if (result != VK_SUCCESS) {
-      vk_free2(&dev->alloc, pAllocator, pipeline);
-      return result;
-   }
-
-   *out_pipeline = pipeline;
-
-   return VK_SUCCESS;
+   return tu_cs_reserve_space(&pipeline->cs, size);
 }
 
 static void
@@ -1880,10 +1905,30 @@ tu6_get_tessmode(struct tu_shader* shader)
    }
 }
 
+static uint64_t
+tu_upload_variant(struct tu_pipeline *pipeline,
+                  const struct ir3_shader_variant *variant)
+{
+   struct tu_cs_memory memory;
+
+   if (!variant)
+      return 0;
+
+   /* this expects to get enough alignment because shaders are allocated first
+    * and sizedwords is always aligned correctly
+    * note: an assert in tu6_emit_xs_config validates the alignment
+    */
+   tu_cs_alloc(&pipeline->cs, variant->info.sizedwords, 1, &memory);
+
+   memcpy(memory.map, variant->bin, sizeof(uint32_t) * variant->info.sizedwords);
+   return memory.iova;
+}
+
 static VkResult
 tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
                                     struct tu_pipeline *pipeline)
 {
+   const struct ir3_compiler *compiler = builder->device->compiler;
    const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
       NULL
    };
@@ -1921,8 +1966,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
 
    pipeline->tess.patch_type = key.tessellation;
 
-   for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
-        stage > MESA_SHADER_NONE; stage--) {
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+        stage < MESA_SHADER_STAGES; stage++) {
       if (!builder->shaders[stage])
          continue;
       
@@ -1932,10 +1977,25 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
                                 &key, false, &created);
       if (!builder->variants[stage])
          return VK_ERROR_OUT_OF_HOST_MEMORY;
+   }
+
+   uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
 
-      builder->shader_offsets[stage] = builder->shader_total_size;
-      builder->shader_total_size +=
-         sizeof(uint32_t) * builder->variants[stage]->info.sizedwords;
+   key.safe_constlen = true;
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+        stage < MESA_SHADER_STAGES; stage++) {
+      if (!builder->shaders[stage])
+         continue;
+
+      if (safe_constlens & (1 << stage)) {
+         bool created;
+         builder->variants[stage] =
+            ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
+                                   &key, false, &created);
+         if (!builder->variants[stage])
+            return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
    }
 
    const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX];
@@ -1945,15 +2005,13 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
       variant = builder->variants[MESA_SHADER_VERTEX];
    } else {
       bool created;
+      key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
       variant = ir3_shader_get_variant(vs->ir3_shader, &key,
                                        true, &created);
       if (!variant)
          return VK_ERROR_OUT_OF_HOST_MEMORY;
    }
 
-   builder->binning_vs_offset = builder->shader_total_size;
-   builder->shader_total_size +=
-      sizeof(uint32_t) * variant->info.sizedwords;
    builder->binning_variant = variant;
 
    if (builder->shaders[MESA_SHADER_TESS_CTRL]) {
@@ -1983,39 +2041,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
    return VK_SUCCESS;
 }
 
-static VkResult
-tu_pipeline_builder_upload_shaders(struct tu_pipeline_builder *builder,
-                                   struct tu_pipeline *pipeline)
-{
-   struct tu_bo *bo = &pipeline->program.binary_bo;
-
-   VkResult result =
-      tu_bo_init_new(builder->device, bo, builder->shader_total_size);
-   if (result != VK_SUCCESS)
-      return result;
-
-   result = tu_bo_map(builder->device, bo);
-   if (result != VK_SUCCESS)
-      return result;
-
-   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct ir3_shader_variant *variant = builder->variants[i];
-      if (!variant)
-         continue;
-
-      memcpy(bo->map + builder->shader_offsets[i], variant->bin,
-             sizeof(uint32_t) * variant->info.sizedwords);
-   }
-
-   if (builder->binning_variant) {
-      const struct ir3_shader_variant *variant = builder->binning_variant;
-      memcpy(bo->map + builder->binning_vs_offset, variant->bin,
-             sizeof(uint32_t) * variant->info.sizedwords);
-   }
-
-   return VK_SUCCESS;
-}
-
 static void
 tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
                                   struct tu_pipeline *pipeline)
@@ -2058,11 +2083,11 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
 {
    struct tu_cs prog_cs;
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
-   tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, false);
+   tu6_emit_program(&prog_cs, builder, false);
    pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
 
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
-   tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, true);
+   tu6_emit_program(&prog_cs, builder, true);
    pipeline->program.binning_state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
 
    VkShaderStageFlags stages = 0;
@@ -2199,10 +2224,10 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
    const VkPipelineRasterizationStateCreateInfo *rast_info =
       builder->create_info->pRasterizationState;
 
-   assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
+   enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
 
    struct tu_cs cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, 7, &cs);
+   tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs);
 
    tu_cs_emit_regs(&cs,
                    A6XX_GRAS_CL_CNTL(
@@ -2211,6 +2236,13 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
                      .unk5 = rast_info->depthClampEnable,
                      .zero_gb_scale_z = 1,
                      .vp_clip_code_ignore = 1));
+
+   tu_cs_emit_regs(&cs,
+                   A6XX_VPC_POLYGON_MODE(.mode = mode));
+
+   tu_cs_emit_regs(&cs,
+                   A6XX_PC_POLYGON_MODE(.mode = mode));
+
    /* move to hw ctx init? */
    tu_cs_emit_regs(&cs, A6XX_GRAS_UNKNOWN_8001());
    tu_cs_emit_regs(&cs,
@@ -2272,6 +2304,12 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
 
    pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
 
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
+      tu_cs_emit_regs(&cs,
+                      A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds),
+                      A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds));
+   }
+
    if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
       tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
                                                .bfmask = ds_info->back.compareMask & 0xff));
@@ -2355,34 +2393,41 @@ tu_pipeline_finish(struct tu_pipeline *pipeline,
                    const VkAllocationCallbacks *alloc)
 {
    tu_cs_finish(&pipeline->cs);
-
-   if (pipeline->program.binary_bo.gem_handle)
-      tu_bo_finish(dev, &pipeline->program.binary_bo);
 }
 
 static VkResult
 tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
                           struct tu_pipeline **pipeline)
 {
-   VkResult result = tu_pipeline_create(builder->device, builder->layout,
-                                        false, builder->alloc, pipeline);
-   if (result != VK_SUCCESS)
-      return result;
+   VkResult result;
+
+   *pipeline =
+      vk_zalloc2(&builder->device->alloc, builder->alloc, sizeof(**pipeline),
+                 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!*pipeline)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
 
    (*pipeline)->layout = builder->layout;
 
    /* compile and upload shaders */
    result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
-   if (result == VK_SUCCESS)
-      result = tu_pipeline_builder_upload_shaders(builder, *pipeline);
    if (result != VK_SUCCESS) {
-      tu_pipeline_finish(*pipeline, builder->device, builder->alloc);
       vk_free2(&builder->device->alloc, builder->alloc, *pipeline);
-      *pipeline = VK_NULL_HANDLE;
+      return result;
+   }
 
+   result = tu_pipeline_allocate_cs(builder->device, *pipeline, builder, NULL);
+   if (result != VK_SUCCESS) {
+      vk_free2(&builder->device->alloc, builder->alloc, *pipeline);
       return result;
    }
 
+   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++)
+      builder->shader_iova[i] = tu_upload_variant(*pipeline, builder->variants[i]);
+
+   builder->binning_vs_iova =
+      tu_upload_variant(*pipeline, builder->binning_variant);
+
    tu_pipeline_builder_parse_dynamic(builder, *pipeline);
    tu_pipeline_builder_parse_shader_stages(builder, *pipeline);
    tu_pipeline_builder_parse_vertex_input(builder, *pipeline);
@@ -2519,30 +2564,6 @@ tu_CreateGraphicsPipelines(VkDevice device,
    return final_result;
 }
 
-static VkResult
-tu_compute_upload_shader(VkDevice device,
-                         struct tu_pipeline *pipeline,
-                         struct ir3_shader_variant *v)
-{
-   TU_FROM_HANDLE(tu_device, dev, device);
-   struct tu_bo *bo = &pipeline->program.binary_bo;
-
-   uint32_t shader_size = sizeof(uint32_t) * v->info.sizedwords;
-   VkResult result =
-      tu_bo_init_new(dev, bo, shader_size);
-   if (result != VK_SUCCESS)
-      return result;
-
-   result = tu_bo_map(dev, bo);
-   if (result != VK_SUCCESS)
-      return result;
-
-   memcpy(bo->map, v->bin, shader_size);
-
-   return VK_SUCCESS;
-}
-
-
 static VkResult
 tu_compute_pipeline_create(VkDevice device,
                            VkPipelineCache _cache,
@@ -2559,9 +2580,11 @@ tu_compute_pipeline_create(VkDevice device,
 
    *pPipeline = VK_NULL_HANDLE;
 
-   result = tu_pipeline_create(dev, layout, true, pAllocator, &pipeline);
-   if (result != VK_SUCCESS)
-      return result;
+   pipeline =
+      vk_zalloc2(&dev->alloc, pAllocator, sizeof(*pipeline), 8,
+                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!pipeline)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
 
    pipeline->layout = layout;
 
@@ -2577,22 +2600,26 @@ tu_compute_pipeline_create(VkDevice device,
    bool created;
    struct ir3_shader_variant *v =
       ir3_shader_get_variant(shader->ir3_shader, &key, false, &created);
-   if (!v)
+   if (!v) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
       goto fail;
+   }
 
    tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE],
                            shader, v);
 
-   result = tu_compute_upload_shader(device, pipeline, v);
+   result = tu_pipeline_allocate_cs(dev, pipeline, NULL, v);
    if (result != VK_SUCCESS)
       goto fail;
 
+   uint64_t shader_iova = tu_upload_variant(pipeline, v);
+
    for (int i = 0; i < 3; i++)
       pipeline->compute.local_size[i] = v->shader->nir->info.cs.local_size[i];
 
    struct tu_cs prog_cs;
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
-   tu6_emit_cs_config(&prog_cs, shader, v, pipeline->program.binary_bo.iova);
+   tu6_emit_cs_config(&prog_cs, shader, v, shader_iova);
    pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
 
    tu6_emit_load_state(pipeline, true);
@@ -2604,7 +2631,6 @@ fail:
    if (shader)
       tu_shader_destroy(dev, shader, pAllocator);
 
-   tu_pipeline_finish(pipeline, dev, pAllocator);
    vk_free2(&dev->alloc, pAllocator, pipeline);
 
    return result;