anv: Use different BOs for different scratch sizes and stages
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 16 Jun 2016 22:26:54 +0000 (15:26 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 22 Jun 2016 19:39:45 +0000 (12:39 -0700)
This solves a race condition where we can end up having different stages
stomp on each other because they're all trying to scratch in the same BO
but they have different views of its layout.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/gen7_pipeline.c
src/intel/vulkan/gen8_pipeline.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_pipeline.c

index 97300c37ea56923986d17e96022d3d222aee0151..ea8e875ad1e566e0defa5a630b3b67d6d8c58ea3 100644 (file)
@@ -878,7 +878,7 @@ VkResult anv_CreateDevice(
 
    anv_bo_init_new(&device->workaround_bo, device, 1024);
 
-   anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
+   anv_scratch_pool_init(device, &device->scratch_pool);
 
    anv_queue_init(device, &device->queue);
 
@@ -947,7 +947,7 @@ void anv_DestroyDevice(
    anv_block_pool_finish(&device->instruction_block_pool);
    anv_state_pool_finish(&device->surface_state_pool);
    anv_block_pool_finish(&device->surface_state_block_pool);
-   anv_block_pool_finish(&device->scratch_block_pool);
+   anv_scratch_pool_finish(device, &device->scratch_pool);
 
    close(device->fd);
 
index 295b48cf49222b4fc9f170d2adc4d10f05d43e49..29747cf6c4a43488df274ca69675c144e806bc73 100644 (file)
@@ -397,22 +397,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 const struct brw_stage_prog_data *prog_data,
                                 struct anv_pipeline_bind_map *map)
 {
-   struct brw_device_info *devinfo = &pipeline->device->info;
-   uint32_t max_threads[] = {
-      [MESA_SHADER_VERTEX]                  = devinfo->max_vs_threads,
-      [MESA_SHADER_TESS_CTRL]               = devinfo->max_hs_threads,
-      [MESA_SHADER_TESS_EVAL]               = devinfo->max_ds_threads,
-      [MESA_SHADER_GEOMETRY]                = devinfo->max_gs_threads,
-      [MESA_SHADER_FRAGMENT]                = devinfo->max_wm_threads,
-      [MESA_SHADER_COMPUTE]                 = devinfo->max_cs_threads,
-   };
-
    pipeline->prog_data[stage] = prog_data;
    pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
-   pipeline->scratch_start[stage] = pipeline->total_scratch;
-   pipeline->total_scratch =
-      align_u32(pipeline->total_scratch, 1024) +
-      prog_data->total_scratch * max_threads[stage];
    pipeline->bindings[stage] = *map;
 }
 
@@ -1176,7 +1162,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
     * of various prog_data pointers.  Make them NULL by default.
     */
    memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
    memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
 
    pipeline->vs_simd8 = NO_KERNEL;
@@ -1185,7 +1170,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    pipeline->ps_ksp0 = NO_KERNEL;
 
    pipeline->active_stages = 0;
-   pipeline->total_scratch = 0;
 
    const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
    struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
@@ -1278,10 +1262,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    if (extra && extra->use_rectlist)
       pipeline->topology = _3DPRIM_RECTLIST;
 
-   while (anv_block_pool_size(&device->scratch_block_pool) <
-          pipeline->total_scratch)
-      anv_block_pool_alloc(&device->scratch_block_pool);
-
    return VK_SUCCESS;
 }
 
index ebbf2bc48324402fd4864b6a6e2fa8800b62e198..50b860ca581e8a41626edf425e07c195cb3e7523 100644 (file)
@@ -711,7 +711,7 @@ struct anv_device {
 
     struct anv_queue                            queue;
 
-    struct anv_block_pool                       scratch_block_pool;
+    struct anv_scratch_pool                     scratch_pool;
 
     uint32_t                                    default_mocs;
 
@@ -1471,8 +1471,6 @@ struct anv_pipeline {
    bool                                         needs_data_cache;
 
    const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
-   uint32_t                                     scratch_start[MESA_SHADER_STAGES];
-   uint32_t                                     total_scratch;
    struct {
       uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
index 56e59a4bab568c3b501a0468efd68277c0b0ab36..89cb51f891689db7050e9bc9cf465260831e8c31 100644 (file)
@@ -252,8 +252,10 @@ genX(graphics_pipeline_create)(
          vs.KernelStartPointer         = pipeline->vs_vec4;
 
          vs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_VERTEX,
+                                         vs_prog_data->base.base.total_scratch),
+            .offset = 0,
          };
          vs.PerThreadScratchSpace      = scratch_space(&vs_prog_data->base.base);
 
@@ -276,8 +278,10 @@ genX(graphics_pipeline_create)(
          gs.KernelStartPointer         = pipeline->gs_kernel;
 
          gs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_GEOMETRY,
+                                         gs_prog_data->base.base.total_scratch),
+            .offset = 0,
          };
          gs.PerThreadScratchSpace      = scratch_space(&gs_prog_data->base.base);
 
@@ -338,8 +342,10 @@ genX(graphics_pipeline_create)(
          ps.KernelStartPointer0           = pipeline->ps_ksp0;
 
          ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_FRAGMENT,
+                                         wm_prog_data->base.total_scratch),
+            .offset = 0,
          };
          ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
          ps.MaximumNumberofThreads        = device->info.max_wm_threads - 1;
index 53fca2b46e83d1037d9936d2e367d0d2b079d9f6..6d70df6921c7a8cb199d459e9a5e151f4af25ee7 100644 (file)
@@ -361,8 +361,10 @@ genX(graphics_pipeline_create)(
          gs.ExpectedVertexCount     = gs_prog_data->vertices_in;
 
          gs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_GEOMETRY,
+                                         gs_prog_data->base.base.total_scratch),
+            .offset = 0,
          };
          gs.PerThreadScratchSpace   = scratch_space(&gs_prog_data->base.base);
          gs.OutputVertexSize        = gs_prog_data->output_vertex_size_hwords * 2 - 1;
@@ -431,8 +433,10 @@ genX(graphics_pipeline_create)(
          vs.SoftwareExceptionEnable       = false;
 
          vs.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_VERTEX,
+                                         vs_prog_data->base.base.total_scratch),
+            .offset = 0,
          };
          vs.PerThreadScratchSpace   = scratch_space(&vs_prog_data->base.base);
 
@@ -483,8 +487,10 @@ genX(graphics_pipeline_create)(
          ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
 
          ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = NULL,
-            .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
+            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                         MESA_SHADER_FRAGMENT,
+                                         wm_prog_data->base.total_scratch),
+            .offset = 0,
          };
          ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);
 
index 95f9b48201d64e777f91bc0e73523235cbb3f766..741d5bfd602a85f144a663570e2b09f92b71756b 100644 (file)
@@ -33,12 +33,6 @@ void
 genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_device *device = cmd_buffer->device;
-   struct anv_bo *scratch_bo = NULL;
-
-   cmd_buffer->state.scratch_size =
-      anv_block_pool_size(&device->scratch_block_pool);
-   if (cmd_buffer->state.scratch_size > 0)
-      scratch_bo = &device->scratch_block_pool.bo;
 
 /* XXX: Do we need this on more than just BDW? */
 #if (GEN_GEN >= 8)
@@ -55,7 +49,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
 #endif
 
    anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
-      sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 };
+      sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
       sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
       sba.GeneralStateBaseAddressModifyEnable = true;
 
@@ -503,13 +497,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
    cmd_buffer->state.vb_dirty &= ~vb_emit;
 
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
-      /* If somebody compiled a pipeline after starting a command buffer the
-       * scratch bo may have grown since we started this cmd buffer (and
-       * emitted STATE_BASE_ADDRESS).  If we're binding that pipeline now,
-       * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
-      if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
-         anv_cmd_buffer_emit_state_base_address(cmd_buffer);
-
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
 
       /* The exact descriptor layout is pulled from the pipeline, so we need
index 736044889b8350102fc63aef85becb9646f96d76..5cbcfd2e48ae649dd7b791d6e6cbf2f56e59abab 100644 (file)
@@ -64,7 +64,6 @@ genX(compute_pipeline_create)(
     * of various prog_data pointers.  Make them NULL by default.
     */
    memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
    memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
 
    pipeline->vs_simd8 = NO_KERNEL;
@@ -72,7 +71,6 @@ genX(compute_pipeline_create)(
    pipeline->gs_kernel = NO_KERNEL;
 
    pipeline->active_stages = 0;
-   pipeline->total_scratch = 0;
 
    pipeline->needs_data_cache = false;
 
@@ -103,8 +101,10 @@ genX(compute_pipeline_create)(
 
    anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
       vfe.ScratchSpaceBasePointer = (struct anv_address) {
-         .bo = NULL,
-         .offset = pipeline->scratch_start[MESA_SHADER_COMPUTE],
+         .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
+                                      MESA_SHADER_COMPUTE,
+                                      cs_prog_data->base.total_scratch),
+         .offset = 0,
       };
       vfe.PerThreadScratchSpace  = ffs(cs_prog_data->base.total_scratch / 2048);
 #if GEN_GEN > 7