vk: Implement scratch buffers to make spilling work

author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Fri, 19 Jun 2015 22:41:30 +0000 (15:41 -0700)

committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Fri, 19 Jun 2015 22:42:15 +0000 (15:42 -0700)
author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Fri, 19 Jun 2015 22:41:30 +0000 (15:41 -0700)
committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Fri, 19 Jun 2015 22:42:15 +0000 (15:42 -0700)
diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp

index 5f5dacb54069a5fef93681d7f7357a1a3f57cf07..19a403aa1c1ab97232b81865395b6a569390bb21 100644 (file)
--- a/src/vulkan/compiler.cpp
+++ b/src/vulkan/compiler.cpp
@@ -248,13 +248,6 @@ really_do_vs_prog(struct brw_context *brw,
  
     ralloc_free(mem_ctx);
  
-   if (stage_prog_data->total_scratch > 0)
-      if (!anv_bo_init_new(&pipeline->vs_scratch_bo,
-                           pipeline->device,
-                           stage_prog_data->total_scratch))
-         return false;
-
-
     return true;
  }
  
@@ -535,12 +528,6 @@ really_do_wm_prog(struct brw_context *brw,
  
     ralloc_free(mem_ctx);
  
-   if (prog_data->base.total_scratch > 0)
-      if (!anv_bo_init_new(&pipeline->ps_scratch_bo,
-                           pipeline->device,
-                           prog_data->base.total_scratch))
-         return false;
-
     return true;
  }
  
@@ -594,13 +581,6 @@ really_do_gs_prog(struct brw_context *brw,
  
     ralloc_free(output.mem_ctx);
  
-   if (output.prog_data.base.base.total_scratch) {
-      if (!anv_bo_init_new(&pipeline->gs_scratch_bo,
-                           pipeline->device,
-                           output.prog_data.base.base.total_scratch))
-         return false;
-   }
-
     return true;
  }
  
@@ -684,6 +664,7 @@ fail_on_compile_error(int status, const char *msg)
  }
  
  struct anv_compiler {
+   struct anv_device *device;
     struct intel_screen *screen;
     struct brw_context *brw;
     struct gl_pipeline_object pipeline;
@@ -710,6 +691,8 @@ anv_compiler_create(struct anv_device *device)
     if (compiler->brw == NULL)
        goto fail;
  
+   compiler->device = device;
+
     compiler->brw->optionCache.info = NULL;
     compiler->brw->bufmgr = NULL;
     compiler->brw->gen = devinfo->gen;
@@ -967,6 +950,28 @@ anv_compile_shader_spirv(struct anv_compiler *compiler,
     unreachable("SPIR-V is not supported yet!");
  }
  
+static void
+add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
+                   struct brw_stage_prog_data *prog_data)
+{
+   struct brw_device_info *devinfo = &pipeline->device->info;
+   uint32_t max_threads[] = {
+      [VK_SHADER_STAGE_VERTEX]                  = devinfo->max_vs_threads,
+      [VK_SHADER_STAGE_TESS_CONTROL]            = 0,
+      [VK_SHADER_STAGE_TESS_EVALUATION]         = 0,
+      [VK_SHADER_STAGE_GEOMETRY]                = devinfo->max_gs_threads,
+      [VK_SHADER_STAGE_FRAGMENT]                = devinfo->max_wm_threads,
+      [VK_SHADER_STAGE_COMPUTE]                 = devinfo->max_cs_threads,
+   };
+
+   pipeline->prog_data[stage] = prog_data;
+   pipeline->active_stages |= 1 << stage;
+   pipeline->scratch_start[stage] = pipeline->total_scratch;
+   pipeline->total_scratch =
+      ALIGN_U32(pipeline->total_scratch, 1024) +
+      prog_data->total_scratch * max_threads[stage];
+}
+
  int
  anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  {
@@ -978,6 +983,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
      * of various prog_data pointers.  Make them NULL by default.
      */
     memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
  
     brw->use_rep_send = pipeline->use_repclear;
     brw->no_simd8 = pipeline->use_repclear;
@@ -1024,6 +1030,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  
     bool success;
     pipeline->active_stages = 0;
+   pipeline->total_scratch = 0;
  
     if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
        struct brw_vs_prog_key vs_key;
@@ -1035,8 +1042,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  
        success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
        fail_if(!success, "do_wm_prog failed\n");
-      pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base;
-      pipeline->active_stages |= VK_SHADER_STAGE_VERTEX_BIT;;
+      add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
+                         &pipeline->vs_prog_data.base.base);
     } else {
        memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
        pipeline->vs_simd8 = NO_KERNEL;
@@ -1053,8 +1060,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  
        success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
        fail_if(!success, "do_gs_prog failed\n");
-      pipeline->active_stages |= VK_SHADER_STAGE_GEOMETRY_BIT;
-      pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base;
+      add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
+                         &pipeline->gs_prog_data.base.base);
     } else {
        pipeline->gs_vec4 = NO_KERNEL;
     }
@@ -1069,8 +1076,8 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  
        success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
        fail_if(!success, "do_wm_prog failed\n");
-      pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base;
-      pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
+      add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
+                         &pipeline->wm_prog_data.base);
     }
  
     if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
@@ -1083,12 +1090,16 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
  
        success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
        fail_if(!success, "brw_codegen_cs_prog failed\n");
-      pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base;
-      pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+      add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
+                         &pipeline->cs_prog_data.base);
     }
  
     brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
  
+   struct anv_device *device = compiler->device;
+   while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
+      anv_block_pool_alloc(&device->scratch_block_pool);
+
     gen7_compute_urb_partition(pipeline);
  
     return 0;
diff --git a/src/vulkan/device.c b/src/vulkan/device.c

index e55e66fd74f30f2b10bb08a83275f2f6041a983d..c9e66743def94d245bdc738cf799f7570c512385 100644 (file)
--- a/src/vulkan/device.c
+++ b/src/vulkan/device.c
@@ -404,6 +404,8 @@ VkResult anv_CreateDevice(
     anv_state_pool_init(&device->surface_state_pool,
                         &device->surface_state_block_pool);
  
+   anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
+
     device->info = *physicalDevice->info;
  
     device->compiler = anv_compiler_create(device);
@@ -2387,9 +2389,14 @@ static void
  anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
  {
     struct anv_device *device = cmd_buffer->device;
+   struct anv_bo *scratch_bo = NULL;
+
+   cmd_buffer->scratch_size = device->scratch_block_pool.size;
+   if (cmd_buffer->scratch_size > 0)
+      scratch_bo = &device->scratch_block_pool.bo;
  
     anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
-                  .GeneralStateBaseAddress = { NULL, 0 },
+                  .GeneralStateBaseAddress = { scratch_bo, 0 },
                    .GeneralStateMemoryObjectControlState = GEN8_MOCS,
                    .GeneralStateBaseAddressModifyEnable = true,
                    .GeneralStateBufferSize = 0xfffff,
@@ -3213,8 +3220,16 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
        }
     }
  
-   if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+   if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
+      /* If somebody compiled a pipeline after starting a command buffer the
+       * scratch bo may have grown since we started this cmd buffer (and
+       * emitted STATE_BASE_ADDRESS).  If we're binding that pipeline now,
+       * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
+      if (cmd_buffer->scratch_size < pipeline->total_scratch)
+         anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+
        anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+   }
  
     if (cmd_buffer->descriptors_dirty)
        flush_descriptor_sets(cmd_buffer);
diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c

index aa24ad4300441eb48fff1c0e0605ae1493c48ba9..8be47c91205471843106b3c3fcc37954c2ab9dbf 100644 (file)
--- a/src/vulkan/pipeline.c
+++ b/src/vulkan/pipeline.c
@@ -573,8 +573,8 @@ anv_pipeline_create(
                       .BindingTableEntryCount = 0,
                       .ExpectedVertexCount = pipeline->gs_vertex_count,
                          
-                     .PerThreadScratchSpace = 0,
-                     .ScratchSpaceBasePointer = 0,
+                     .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY],
+                     .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048),
  
                       .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
                       .OutputTopology = gs_prog_data->output_topology,
@@ -628,11 +628,8 @@ anv_pipeline_create(
                       .AccessesUAV = false,
                       .SoftwareExceptionEnable = false,
  
-                     /* FIXME: pointer needs to be assigned outside as it aliases
-                      * PerThreadScratchSpace.
-                      */
-                     .ScratchSpaceBasePointer = 0,
-                     .PerThreadScratchSpace = 0,
+                     .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX],
+                     .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048),
  
                       .DispatchGRFStartRegisterForURBData =
                       vue_prog_data->base.dispatch_grf_start_reg,
@@ -676,8 +673,8 @@ anv_pipeline_create(
                    .VectorMaskEnable = true,
                    .SamplerCount = 1,
  
-                  .ScratchSpaceBasePointer = 0,
-                  .PerThreadScratchSpace = 0,
+                  .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
+                  .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
                    
                    .MaximumNumberofThreadsPerPSD = 64 - 2,
                    .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
@@ -757,11 +754,13 @@ VkResult anv_CreateComputePipeline(
  
     anv_compiler_run(device->compiler, pipeline);
  
+   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+
     anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE,
-                  .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */
-                  .StackSize = 0,
-                  .PerThreadScratchSpace = 0,
+                  .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT],
+                  .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048),
                    .ScratchSpaceBasePointerHigh = 0,
+                  .StackSize = 0,
  
                    .MaximumNumberofThreads = device->info.max_cs_threads - 1,
                    .NumberofURBEntries = 2,
diff --git a/src/vulkan/private.h b/src/vulkan/private.h

index 08dea1526e875485b06e32b191e293f97589d538..abc0cfb8ec2f320952a311c7ea6fcd3548f16441 100644 (file)
--- a/src/vulkan/private.h
+++ b/src/vulkan/private.h
@@ -384,6 +384,8 @@ struct anv_device {
  
      struct anv_queue                            queue;
  
+    struct anv_block_pool                       scratch_block_pool;
+
      struct anv_compiler *                       compiler;
      struct anv_aub_writer *                     aub_writer;
      pthread_mutex_t                             mutex;
@@ -655,6 +657,7 @@ struct anv_cmd_buffer {
     uint32_t                                     dirty;
     uint32_t                                     compute_dirty;
     uint32_t                                     descriptors_dirty;
+   uint32_t                                     scratch_size;
     struct anv_pipeline *                        pipeline;
     struct anv_pipeline *                        compute_pipeline;
     struct anv_framebuffer *                     framebuffer;
@@ -696,6 +699,8 @@ struct anv_pipeline {
     struct brw_gs_prog_data                      gs_prog_data;
     struct brw_cs_prog_data                      cs_prog_data;
     struct brw_stage_prog_data *                 prog_data[VK_NUM_SHADER_STAGE];
+   uint32_t                                     scratch_start[VK_NUM_SHADER_STAGE];
+   uint32_t                                     total_scratch;
     struct {
        uint32_t                                  vs_start;
        uint32_t                                  vs_size;
@@ -705,11 +710,6 @@ struct anv_pipeline {
        uint32_t                                  nr_gs_entries;
     } urb;
  
-   struct anv_bo                                vs_scratch_bo;
-   struct anv_bo                                ps_scratch_bo;
-   struct anv_bo                                gs_scratch_bo;
-   struct anv_bo                                cs_scratch_bo;
-
     uint32_t                                     active_stages;
     struct anv_state_stream                      program_stream;
     struct anv_state                             blend_state;
author	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Fri, 19 Jun 2015 22:41:30 +0000 (15:41 -0700)
committer	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Fri, 19 Jun 2015 22:42:15 +0000 (15:42 -0700)
src/vulkan/compiler.cpp		patch \| blob \| history
src/vulkan/device.c		patch \| blob \| history
src/vulkan/pipeline.c		patch \| blob \| history
src/vulkan/private.h		patch \| blob \| history