spirv: Change spirv_to_nir() to return a nir_shader

[mesa.git] / src / intel / vulkan / anv_pipeline.c
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c

index 53500fd65d8b1281b5db201ba9783c50ecc2ca9e..f244b0c991b09ad7da9e873855bd4b254a9bcf3c 100644 (file)
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -28,6 +28,7 @@
  #include <fcntl.h>
  
  #include "util/mesa-sha1.h"
+#include "util/os_time.h"
  #include "common/gen_l3_config.h"
  #include "anv_private.h"
  #include "compiler/brw_nir.h"
@@ -136,18 +137,25 @@ anv_shader_compile_to_nir(struct anv_device *device,
     struct spirv_to_nir_options spirv_options = {
        .lower_workgroup_access_to_offsets = true,
        .caps = {
+         .derivative_group = true,
+         .descriptor_array_dynamic_indexing = true,
+         .descriptor_array_non_uniform_indexing = true,
+         .descriptor_indexing = true,
           .device_group = true,
           .draw_parameters = true,
+         .float16 = pdevice->info.gen >= 8,
           .float64 = pdevice->info.gen >= 8,
           .geometry_streams = true,
           .image_write_without_format = true,
+         .int8 = pdevice->info.gen >= 8,
           .int16 = pdevice->info.gen >= 8,
           .int64 = pdevice->info.gen >= 8,
+         .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
           .min_lod = true,
           .multiview = true,
-         .physical_storage_buffer_address = pdevice->info.gen >= 8 &&
-                                            pdevice->use_softpin,
+         .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
           .post_depth_coverage = pdevice->info.gen >= 9,
+         .runtime_descriptor_array = true,
           .shader_viewport_index_layer = true,
           .stencil_export = pdevice->info.gen >= 9,
           .storage_8bit = pdevice->info.gen >= 8,
@@ -162,18 +170,24 @@ anv_shader_compile_to_nir(struct anv_device *device,
           .transform_feedback = pdevice->info.gen >= 8,
           .variable_pointers = true,
        },
-      .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
-      .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
-      .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1),
-      .push_const_ptr_type = glsl_uint_type(),
-      .shared_ptr_type = glsl_uint_type(),
+      .ubo_addr_format = nir_address_format_32bit_index_offset,
+      .ssbo_addr_format =
+          anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
+      .phys_ssbo_addr_format = nir_address_format_64bit_global,
+      .push_const_addr_format = nir_address_format_logical,
+
+      /* TODO: Consider changing this to an address format that has the NULL
+       * pointer equals to 0.  That might be a better format to play nice
+       * with certain code / code generators.
+       */
+      .shared_addr_format = nir_address_format_32bit_offset,
     };
  
-   nir_function *entry_point =
+
+   nir_shader *nir =
        spirv_to_nir(spirv, module->size / 4,
                     spec_entries, num_spec_entries,
                     stage, entrypoint_name, &spirv_options, nir_options);
-   nir_shader *nir = entry_point->shader;
     assert(nir->info.stage == stage);
     nir_validate_shader(nir, "after spirv_to_nir");
     ralloc_steal(mem_ctx, nir);
@@ -197,7 +211,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
  
     /* Pick off the single entrypoint that we want */
     foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
-      if (func != entry_point)
+      if (!func->is_entrypoint)
           exec_node_remove(&func->node);
     }
     assert(exec_list_length(&nir->functions) == 1);
@@ -223,7 +237,9 @@ anv_shader_compile_to_nir(struct anv_device *device,
  
     NIR_PASS_V(nir, nir_propagate_invariant);
     NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-              entry_point->impl, true, false);
+              nir_shader_get_entrypoint(nir), true, false);
+
+   NIR_PASS_V(nir, nir_lower_frexp);
  
     /* Vulkan uses the separate-shader linking model */
     nir->info.separate_shader = true;
@@ -372,20 +388,27 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
  
     key->nr_color_regions = util_bitcount(key->color_outputs_valid);
  
-   key->replicate_alpha = key->nr_color_regions > 1 &&
-                          ms_info && ms_info->alphaToCoverageEnable;
+   /* To reduce possible shader recompilations we would need to know if
+    * there is a SampleMask output variable to compute if we should emit
+    * code to workaround the issue that hardware disables alpha to coverage
+    * when there is SampleMask output.
+    */
+   key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
+
+   /* Vulkan doesn't support fixed-function alpha test */
+   key->alpha_test_replicate_alpha = false;
  
     if (ms_info) {
        /* We should probably pull this out of the shader, but it's fairly
         * harmless to compute it and then let dead-code take care of it.
         */
        if (ms_info->rasterizationSamples > 1) {
-         key->persample_interp =
+         key->persample_interp = ms_info->sampleShadingEnable &&
              (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
           key->multisample_fbo = true;
        }
  
-      key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
+      key->frag_coord_adds_sample_pos = key->persample_interp;
     }
  }
  
@@ -421,6 +444,8 @@ struct anv_pipeline_stage {
     struct anv_pipeline_bind_map bind_map;
  
     union brw_any_prog_data prog_data;
+
+   VkPipelineCreationFeedbackEXT feedback;
  };
  
  static void
@@ -539,8 +564,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
                         struct anv_pipeline_stage *stage,
                         struct anv_pipeline_layout *layout)
  {
-   const struct brw_compiler *compiler =
-      pipeline->device->instance->physicalDevice.compiler;
+   const struct anv_physical_device *pdevice =
+      &pipeline->device->instance->physicalDevice;
+   const struct brw_compiler *compiler = pdevice->compiler;
  
     struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
     nir_shader *nir = stage->nir;
@@ -593,16 +619,25 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
  
     /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
     if (layout) {
-      anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
+      anv_nir_apply_pipeline_layout(pdevice,
                                      pipeline->device->robust_buffer_access,
                                      layout, nir, prog_data,
                                      &stage->bind_map);
  
-      NIR_PASS_V(nir, nir_lower_explicit_io,
-                 nir_var_mem_ubo | nir_var_mem_ssbo,
+      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
                   nir_address_format_32bit_index_offset);
+      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+                 anv_nir_ssbo_addr_format(pdevice,
+                    pipeline->device->robust_buffer_access));
  
        NIR_PASS_V(nir, nir_opt_constant_folding);
+
+      /* We don't support non-uniform UBOs and non-uniform SSBO access is
+       * handled naturally by falling back to A64 messages.
+       */
+      NIR_PASS_V(nir, nir_lower_non_uniform_access,
+                 nir_lower_non_uniform_texture_access |
+                 nir_lower_non_uniform_image_access);
     }
  
     if (nir->info.stage != MESA_SHADER_COMPUTE)
@@ -793,14 +828,24 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
           continue;
  
        const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      /* Unused or out-of-bounds */
-      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
+      /* Out-of-bounds */
+      if (rt >= MAX_RTS)
           continue;
  
        const unsigned array_len =
           glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
        assert(rt + array_len <= max_rt);
  
+      /* Unused */
+      if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
+         /* If this is the RT at location 0 and we have alpha to coverage
+          * enabled we will have to create a null RT for it, so mark it as
+          * used.
+          */
+         if (rt > 0 || !stage->key.wm.alpha_to_coverage)
+            continue;
+      }
+
        for (unsigned i = 0; i < array_len; i++)
           rt_used[rt + i] = true;
     }
@@ -811,11 +856,22 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
           continue;
  
        rt_to_bindings[i] = num_rts;
-      rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
-         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
-         .binding = 0,
-         .index = i,
-      };
+
+      if (stage->key.wm.color_outputs_valid & (1 << i)) {
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = i,
+         };
+      } else {
+         /* Setup a null render target */
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = UINT32_MAX,
+         };
+      }
+
        num_rts++;
     }
  
@@ -825,9 +881,11 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
           continue;
  
        const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      if (rt >= MAX_RTS ||
-          !(stage->key.wm.color_outputs_valid & (1 << rt))) {
-         /* Unused or out-of-bounds, throw it away */
+
+      if (rt >= MAX_RTS || !rt_used[rt]) {
+         /* Unused or out-of-bounds, throw it away, unless it is the first
+          * RT and we have alpha to coverage enabled.
+          */
           deleted_output = true;
           var->data.mode = nir_var_function_temp;
           exec_node_remove(&var->node);
@@ -906,6 +964,11 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
                                struct anv_pipeline_cache *cache,
                                const VkGraphicsPipelineCreateInfo *info)
  {
+   VkPipelineCreationFeedbackEXT pipeline_feedback = {
+      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+   };
+   int64_t pipeline_start = os_time_get_nano();
+
     const struct brw_compiler *compiler =
        pipeline->device->instance->physicalDevice.compiler;
     struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
@@ -919,6 +982,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
  
        pipeline->active_stages |= sinfo->stage;
  
+      int64_t stage_start = os_time_get_nano();
+
        stages[stage].stage = stage;
        stages[stage].module = anv_shader_module_from_handle(sinfo->module);
        stages[stage].entrypoint = sinfo->pName;
@@ -953,6 +1018,9 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
        default:
           unreachable("Invalid graphics shader stage");
        }
+
+      stages[stage].feedback.duration += os_time_get_nano() - stage_start;
+      stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
     }
  
     if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
@@ -966,24 +1034,39 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
     anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
  
     unsigned found = 0;
+   unsigned cache_hits = 0;
     for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
        if (!stages[s].entrypoint)
           continue;
  
+      int64_t stage_start = os_time_get_nano();
+
        stages[s].cache_key.stage = s;
        memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
  
+      bool cache_hit;
        struct anv_shader_bin *bin =
           anv_device_search_for_kernel(pipeline->device, cache,
                                        &stages[s].cache_key,
-                                      sizeof(stages[s].cache_key));
+                                      sizeof(stages[s].cache_key), &cache_hit);
        if (bin) {
           found++;
           pipeline->shaders[s] = bin;
        }
+
+      if (cache_hit) {
+         cache_hits++;
+         stages[s].feedback.flags |=
+            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+      }
+      stages[s].feedback.duration += os_time_get_nano() - stage_start;
     }
  
     if (found == __builtin_popcount(pipeline->active_stages)) {
+      if (cache_hits == found) {
+         pipeline_feedback.flags |=
+            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+      }
        /* We found all our shaders in the cache.  We're done. */
        goto done;
     } else if (found > 0) {
@@ -1008,6 +1091,7 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
         * cache again as part of the compilation process.
         */
        for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+         stages[s].feedback.flags = 0;
           if (pipeline->shaders[s]) {
              anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
              pipeline->shaders[s] = NULL;
@@ -1021,6 +1105,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
        if (!stages[s].entrypoint)
           continue;
  
+      int64_t stage_start = os_time_get_nano();
+
        assert(stages[s].stage == s);
        assert(pipeline->shaders[s] == NULL);
  
@@ -1036,6 +1122,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
           result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
           goto fail;
        }
+
+      stages[s].feedback.duration += os_time_get_nano() - stage_start;
     }
  
     /* Walk backwards to link */
@@ -1072,6 +1160,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
        if (!stages[s].entrypoint)
           continue;
  
+      int64_t stage_start = os_time_get_nano();
+
        void *stage_ctx = ralloc_context(NULL);
  
        nir_xfb_info *xfb_info = NULL;
@@ -1132,6 +1222,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
        pipeline->shaders[s] = bin;
        ralloc_free(stage_ctx);
  
+      stages[s].feedback.duration += os_time_get_nano() - stage_start;
+
        prev_stage = &stages[s];
     }
  
@@ -1150,6 +1242,20 @@ done:
        pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
     }
  
+   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+
+   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
+      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+   if (create_feedback) {
+      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
+
+      assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
+      for (uint32_t i = 0; i < info->stageCount; i++) {
+         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
+         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
+      }
+   }
+
     return VK_SUCCESS;
  
  fail:
@@ -1171,6 +1277,11 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
                          const char *entrypoint,
                          const VkSpecializationInfo *spec_info)
  {
+   VkPipelineCreationFeedbackEXT pipeline_feedback = {
+      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+   };
+   int64_t pipeline_start = os_time_get_nano();
+
     const struct brw_compiler *compiler =
        pipeline->device->instance->physicalDevice.compiler;
  
@@ -1181,7 +1292,10 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
        .spec_info = spec_info,
        .cache_key = {
           .stage = MESA_SHADER_COMPUTE,
-      }
+      },
+      .feedback = {
+         .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+      },
     };
     anv_pipeline_hash_shader(stage.module,
                              stage.entrypoint,
@@ -1196,10 +1310,13 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
     ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
  
     anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
+   bool cache_hit;
     bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
-                                      sizeof(stage.cache_key));
+                                      sizeof(stage.cache_key), &cache_hit);
  
     if (bin == NULL) {
+      int64_t stage_start = os_time_get_nano();
+
        stage.bind_map = (struct anv_pipeline_bind_map) {
           .surface_to_descriptor = stage.surface_to_descriptor,
           .sampler_to_descriptor = stage.sampler_to_descriptor
@@ -1247,6 +1364,25 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
        }
  
        ralloc_free(mem_ctx);
+
+      stage.feedback.duration = os_time_get_nano() - stage_start;
+   }
+
+   if (cache_hit) {
+      stage.feedback.flags |=
+         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+      pipeline_feedback.flags |=
+         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+   }
+   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+
+   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
+      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+   if (create_feedback) {
+      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
+
+      assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
+      create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
     }
  
     pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;