anv: Implement VK_KHR_pipeline_executable_properties

author Jason Ekstrand <jason@jlekstrand.net>

Wed, 24 Apr 2019 08:02:35 +0000 (03:02 -0500)

committer Jason Ekstrand <jason@jlekstrand.net>

Mon, 12 Aug 2019 22:56:07 +0000 (22:56 +0000)
author Jason Ekstrand <jason@jlekstrand.net>
Wed, 24 Apr 2019 08:02:35 +0000 (03:02 -0500)
committer Jason Ekstrand <jason@jlekstrand.net>
Mon, 12 Aug 2019 22:56:07 +0000 (22:56 +0000)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c

index c4e27433a499ad1e401488a8d2bf94b83b712347..9154c1ab5a7431d3a2a40ee598101d3b2edc6964 100644 (file)
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1126,6 +1126,13 @@ void anv_GetPhysicalDeviceFeatures2(
           break;
        }
  
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
+         VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
+            (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
+         features->pipelineExecutableInfo = true;
+         break;
+      }
+
        case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
           VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
           features->protectedMemory = false;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py

index 2c2accbe566cf5204bff579479f4a969183125e4..7e7ce428987de9628ce81c34c3f073e072d30528 100644 (file)
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -101,6 +101,7 @@ EXTENSIONS = [
      Extension('VK_KHR_maintenance2',                      1, True),
      Extension('VK_KHR_maintenance3',                      1, True),
      Extension('VK_KHR_multiview',                         1, True),
+    Extension('VK_KHR_pipeline_executable_properties',    1, True),
      Extension('VK_KHR_push_descriptor',                   1, True),
      Extension('VK_KHR_relaxed_block_layout',              1, True),
      Extension('VK_KHR_sampler_mirror_clamp_to_edge',      1, True),
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c

index fc492844cce3f9830a245eedf5fbd7289d216d9e..6d705b88e0821b615c97297f8bc5973a8bd14f6d 100644 (file)
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -30,6 +30,7 @@
  #include "util/mesa-sha1.h"
  #include "util/os_time.h"
  #include "common/gen_l3_config.h"
+#include "common/gen_disasm.h"
  #include "anv_private.h"
  #include "compiler/brw_nir.h"
  #include "anv_nir.h"
@@ -529,6 +530,7 @@ struct anv_pipeline_stage {
  
     uint32_t num_stats;
     struct brw_compile_stats stats[3];
+   char *disasm[3];
  
     VkPipelineCreationFeedbackEXT feedback;
  
@@ -1063,6 +1065,77 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
     }
  }
  
+static void
+anv_pipeline_add_executable(struct anv_pipeline *pipeline,
+                            struct anv_pipeline_stage *stage,
+                            struct brw_compile_stats *stats,
+                            uint32_t code_offset)
+{
+   char *disasm = NULL;
+   if (stage->code &&
+       (pipeline->flags &
+        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
+      char *stream_data = NULL;
+      size_t stream_size = 0;
+      FILE *stream = open_memstream(&stream_data, &stream_size);
+
+      /* Creating this is far cheaper than it looks.  It's perfectly fine to
+       * do it for every binary.
+       */
+      struct gen_disasm *d = gen_disasm_create(&pipeline->device->info);
+      gen_disasm_disassemble(d, stage->code, code_offset, stream);
+      gen_disasm_destroy(d);
+
+      fclose(stream);
+
+      /* Copy it to a ralloc'd thing */
+      disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
+      memcpy(disasm, stream_data, stream_size);
+      disasm[stream_size] = 0;
+
+      free(stream_data);
+   }
+
+   pipeline->executables[pipeline->num_executables++] =
+      (struct anv_pipeline_executable) {
+         .stage = stage->stage,
+         .stats = *stats,
+         .disasm = disasm,
+      };
+}
+
+static void
+anv_pipeline_add_executables(struct anv_pipeline *pipeline,
+                             struct anv_pipeline_stage *stage,
+                             struct anv_shader_bin *bin)
+{
+   if (stage->stage == MESA_SHADER_FRAGMENT) {
+      /* We pull the prog data and stats out of the anv_shader_bin because
+       * the anv_pipeline_stage may not be fully populated if we successfully
+       * looked up the shader in a cache.
+       */
+      const struct brw_wm_prog_data *wm_prog_data =
+         (const struct brw_wm_prog_data *)bin->prog_data;
+      struct brw_compile_stats *stats = bin->stats;
+
+      if (wm_prog_data->dispatch_8) {
+         anv_pipeline_add_executable(pipeline, stage, stats++, 0);
+      }
+
+      if (wm_prog_data->dispatch_16) {
+         anv_pipeline_add_executable(pipeline, stage, stats++,
+                                     wm_prog_data->prog_offset_16);
+      }
+
+      if (wm_prog_data->dispatch_32) {
+         anv_pipeline_add_executable(pipeline, stage, stats++,
+                                     wm_prog_data->prog_offset_32);
+      }
+   } else {
+      anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
+   }
+}
+
  static VkResult
  anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
                                struct anv_pipeline_cache *cache,
@@ -1182,6 +1255,13 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
                 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
           }
           /* We found all our shaders in the cache.  We're done. */
+         for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+            if (!stages[s].entrypoint)
+               continue;
+
+            anv_pipeline_add_executables(pipeline, &stages[s],
+                                         pipeline->shaders[s]);
+         }
           goto done;
        } else if (found > 0) {
           /* We found some but not all of our shaders.  This shouldn't happen
@@ -1335,6 +1415,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
           goto fail;
        }
  
+      anv_pipeline_add_executables(pipeline, &stages[s], bin);
+
        pipeline->shaders[s] = bin;
        ralloc_free(stage_ctx);
  
@@ -1455,6 +1537,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
                                           &cache_hit);
     }
  
+   void *mem_ctx = ralloc_context(NULL);
     if (bin == NULL) {
        int64_t stage_start = os_time_get_nano();
  
@@ -1469,8 +1552,6 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
           .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
        };
  
-      void *mem_ctx = ralloc_context(NULL);
-
        stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
        if (stage.nir == NULL) {
           ralloc_free(mem_ctx);
@@ -1511,11 +1592,13 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
           return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
        }
  
-      ralloc_free(mem_ctx);
-
        stage.feedback.duration = os_time_get_nano() - stage_start;
     }
  
+   anv_pipeline_add_executables(pipeline, &stage, bin);
+
+   ralloc_free(mem_ctx);
+
     if (cache_hit) {
        stage.feedback.flags |=
           VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
@@ -1823,6 +1906,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
      * of various prog_data pointers.  Make them NULL by default.
      */
     memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
+   pipeline->num_executables = 0;
  
     result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
     if (result != VK_SUCCESS) {
@@ -1909,3 +1993,187 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
  
     return VK_SUCCESS;
  }
+
+#define WRITE_STR(field, ...) ({                               \
+   memset(field, 0, sizeof(field));                            \
+   UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
+   assert(i > 0 && i < sizeof(field));                         \
+})
+
+VkResult anv_GetPipelineExecutablePropertiesKHR(
+    VkDevice                                    device,
+    const VkPipelineInfoKHR*                    pPipelineInfo,
+    uint32_t*                                   pExecutableCount,
+    VkPipelineExecutablePropertiesKHR*          pProperties)
+{
+   ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
+   VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
+
+   for (uint32_t i = 0; i < pipeline->num_executables; i++) {
+      vk_outarray_append(&out, props) {
+         gl_shader_stage stage = pipeline->executables[i].stage;
+         props->stages = mesa_to_vk_shader_stage(stage);
+
+         unsigned simd_width = pipeline->executables[i].stats.dispatch_width;
+         if (stage == MESA_SHADER_FRAGMENT) {
+            WRITE_STR(props->name, "%s%d %s",
+                      simd_width ? "SIMD" : "vec",
+                      simd_width ? simd_width : 4,
+                      _mesa_shader_stage_to_string(stage));
+         } else {
+            WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
+         }
+         WRITE_STR(props->description, "%s%d %s shader",
+                   simd_width ? "SIMD" : "vec",
+                   simd_width ? simd_width : 4,
+                   _mesa_shader_stage_to_string(stage));
+
+         /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
+          * wants a subgroup size of 1.
+          */
+         props->subgroupSize = MAX2(simd_width, 1);
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+VkResult anv_GetPipelineExecutableStatisticsKHR(
+    VkDevice                                    device,
+    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
+    uint32_t*                                   pStatisticCount,
+    VkPipelineExecutableStatisticKHR*           pStatistics)
+{
+   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
+   VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
+
+   assert(pExecutableInfo->executableIndex < pipeline->num_executables);
+   const struct anv_pipeline_executable *exe =
+      &pipeline->executables[pExecutableInfo->executableIndex];
+   const struct brw_stage_prog_data *prog_data =
+      pipeline->shaders[exe->stage]->prog_data;
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Instruction Count");
+      WRITE_STR(stat->description,
+                "Number of GEN instructions in the final generated "
+                "shader executable.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = exe->stats.instructions;
+   }
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Loop Count");
+      WRITE_STR(stat->description,
+                "Number of loops (not unrolled) in the final generated "
+                "shader executable.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = exe->stats.loops;
+   }
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Cycle Count");
+      WRITE_STR(stat->description,
+                "Estimate of the number of EU cycles required to execute "
+                "the final generated executable.  This is an estimate only "
+                "and may vary greatly from actual run-time performance.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = exe->stats.cycles;
+   }
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Spill Count");
+      WRITE_STR(stat->description,
+                "Number of scratch spill operations.  This gives a rough "
+                "estimate of the cost incurred due to spilling temporary "
+                "values to memory.  If this is non-zero, you may want to "
+                "adjust your shader to reduce register pressure.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = exe->stats.spills;
+   }
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Fill Count");
+      WRITE_STR(stat->description,
+                "Number of scratch fill operations.  This gives a rough "
+                "estimate of the cost incurred due to spilling temporary "
+                "values to memory.  If this is non-zero, you may want to "
+                "adjust your shader to reduce register pressure.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = exe->stats.fills;
+   }
+
+   vk_outarray_append(&out, stat) {
+      WRITE_STR(stat->name, "Scratch Memory Size");
+      WRITE_STR(stat->description,
+                "Number of bytes of scratch memory required by the "
+                "generated shader executable.  If this is non-zero, you "
+                "may want to adjust your shader to reduce register "
+                "pressure.");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = prog_data->total_scratch;
+   }
+
+   if (exe->stage == MESA_SHADER_COMPUTE) {
+      vk_outarray_append(&out, stat) {
+         WRITE_STR(stat->name, "Workgroup Memory Size");
+         WRITE_STR(stat->description,
+                   "Number of bytes of workgroup shared memory used by this "
+                   "compute shader including any padding.");
+         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+         stat->value.u64 = prog_data->total_scratch;
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+static bool
+write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
+              const char *data)
+{
+   ir->isText = VK_TRUE;
+
+   size_t data_len = strlen(data) + 1;
+
+   if (ir->pData == NULL) {
+      ir->dataSize = data_len;
+      return true;
+   }
+
+   strncpy(ir->pData, data, ir->dataSize);
+   if (ir->dataSize < data_len)
+      return false;
+
+   ir->dataSize = data_len;
+   return true;
+}
+
+VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
+    VkDevice                                    device,
+    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
+    uint32_t*                                   pInternalRepresentationCount,
+    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
+{
+   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
+   VK_OUTARRAY_MAKE(out, pInternalRepresentations,
+                    pInternalRepresentationCount);
+   bool incomplete_text = false;
+
+   assert(pExecutableInfo->executableIndex < pipeline->num_executables);
+   const struct anv_pipeline_executable *exe =
+      &pipeline->executables[pExecutableInfo->executableIndex];
+
+   if (exe->disasm) {
+      vk_outarray_append(&out, ir) {
+         WRITE_STR(ir->name, "GEN Assembly");
+         WRITE_STR(ir->description,
+                   "Final GEN assembly for the generated shader binary");
+
+         if (!write_ir_text(ir, exe->disasm))
+            incomplete_text = true;
+      }
+   }
+
+   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index 5682e838abfa31de46b86cff6e36c7f9e3ecdac6..85cf7ea9b6ddda8f4a557d8949a63bb366babddd 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2794,6 +2794,17 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
        anv_shader_bin_destroy(device, shader);
  }
  
+/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */
+#define MAX_PIPELINE_EXECUTABLES 7
+
+struct anv_pipeline_executable {
+   gl_shader_stage stage;
+
+   struct brw_compile_stats stats;
+
+   char *disasm;
+};
+
  struct anv_pipeline {
     struct anv_device *                          device;
     struct anv_batch                             batch;
@@ -2811,6 +2822,9 @@ struct anv_pipeline {
  
     struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
  
+   uint32_t                                     num_executables;
+   struct anv_pipeline_executable               executables[MAX_PIPELINE_EXECUTABLES];
+
     struct {
        const struct gen_l3_config *              l3_config;
        uint32_t                                  total_size;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index e0e723c13a41b9f72248c8d32c6f3947f63804ff..88f5b358faf264f835bf37672893fd234a18b746 100644 (file)
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -2233,6 +2233,7 @@ compute_pipeline_create(
      * of various prog_data pointers.  Make them NULL by default.
      */
     memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
+   pipeline->num_executables = 0;
  
     pipeline->needs_data_cache = false;
author	Jason Ekstrand <jason@jlekstrand.net>
	Wed, 24 Apr 2019 08:02:35 +0000 (03:02 -0500)
committer	Jason Ekstrand <jason@jlekstrand.net>
	Mon, 12 Aug 2019 22:56:07 +0000 (22:56 +0000)
src/intel/vulkan/anv_device.c		patch \| blob \| history
src/intel/vulkan/anv_extensions.py		patch \| blob \| history
src/intel/vulkan/anv_pipeline.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/genX_pipeline.c		patch \| blob \| history