anv+tu+radv: delete unusable dev_icd.json
[mesa.git] / src / intel / vulkan / anv_pipeline.c
index 90942a4524a81c1a09cf19aa7989a683c368d3d3..c2a164cc802af972f94edb4838937b3ed1905f7e 100644 (file)
@@ -95,6 +95,33 @@ static const uint64_t stage_to_debug[] = {
    [MESA_SHADER_COMPUTE] = DEBUG_CS,
 };
 
+struct anv_spirv_debug_data {
+   struct anv_device *device;
+   const struct anv_shader_module *module;
+};
+
+static void anv_spirv_nir_debug(void *private_data,
+                                enum nir_spirv_debug_level level,
+                                size_t spirv_offset,
+                                const char *message)
+{
+   struct anv_spirv_debug_data *debug_data = private_data;
+   static const VkDebugReportFlagsEXT vk_flags[] = {
+      [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
+      [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
+      [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
+   };
+   char buffer[256];
+
+   snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
+
+   vk_debug_report(&debug_data->device->instance->debug_report_callbacks,
+                   vk_flags[level],
+                   VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
+                   (uint64_t) (uintptr_t) debug_data->module,
+                   0, 0, "anv", buffer);
+}
+
 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
  * we can't do that yet because we don't have the ability to copy nir.
  */
@@ -134,21 +161,35 @@ anv_shader_compile_to_nir(struct anv_device *device,
       }
    }
 
+   struct anv_spirv_debug_data spirv_debug_data = {
+      .device = device,
+      .module = module,
+   };
    struct spirv_to_nir_options spirv_options = {
       .lower_workgroup_access_to_offsets = true,
       .caps = {
+         .demote_to_helper_invocation = true,
+         .derivative_group = true,
+         .descriptor_array_dynamic_indexing = true,
+         .descriptor_array_non_uniform_indexing = true,
+         .descriptor_indexing = true,
          .device_group = true,
          .draw_parameters = true,
+         .float16 = pdevice->info.gen >= 8,
          .float64 = pdevice->info.gen >= 8,
+         .fragment_shader_sample_interlock = pdevice->info.gen >= 9,
+         .fragment_shader_pixel_interlock = pdevice->info.gen >= 9,
          .geometry_streams = true,
          .image_write_without_format = true,
+         .int8 = pdevice->info.gen >= 8,
          .int16 = pdevice->info.gen >= 8,
          .int64 = pdevice->info.gen >= 8,
+         .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
          .min_lod = true,
          .multiview = true,
-         .physical_storage_buffer_address = pdevice->info.gen >= 8 &&
-                                            pdevice->use_softpin,
+         .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
          .post_depth_coverage = pdevice->info.gen >= 9,
+         .runtime_descriptor_array = true,
          .shader_viewport_index_layer = true,
          .stencil_export = pdevice->info.gen >= 9,
          .storage_8bit = pdevice->info.gen >= 8,
@@ -163,18 +204,28 @@ anv_shader_compile_to_nir(struct anv_device *device,
          .transform_feedback = pdevice->info.gen >= 8,
          .variable_pointers = true,
       },
-      .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
-      .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
-      .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1),
-      .push_const_ptr_type = glsl_uint_type(),
-      .shared_ptr_type = glsl_uint_type(),
+      .ubo_addr_format = nir_address_format_32bit_index_offset,
+      .ssbo_addr_format =
+          anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
+      .phys_ssbo_addr_format = nir_address_format_64bit_global,
+      .push_const_addr_format = nir_address_format_logical,
+
+      /* TODO: Consider changing this to an address format that has the NULL
+       * pointer equals to 0.  That might be a better format to play nice
+       * with certain code / code generators.
+       */
+      .shared_addr_format = nir_address_format_32bit_offset,
+      .debug = {
+         .func = anv_spirv_nir_debug,
+         .private_data = &spirv_debug_data,
+      },
    };
 
-   nir_function *entry_point =
+
+   nir_shader *nir =
       spirv_to_nir(spirv, module->size / 4,
                    spec_entries, num_spec_entries,
                    stage, entrypoint_name, &spirv_options, nir_options);
-   nir_shader *nir = entry_point->shader;
    assert(nir->info.stage == stage);
    nir_validate_shader(nir, "after spirv_to_nir");
    ralloc_steal(mem_ctx, nir);
@@ -198,7 +249,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
 
    /* Pick off the single entrypoint that we want */
    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
-      if (func != entry_point)
+      if (!func->is_entrypoint)
          exec_node_remove(&func->node);
    }
    assert(exec_list_length(&nir->functions) == 1);
@@ -219,19 +270,16 @@ anv_shader_compile_to_nir(struct anv_device *device,
    NIR_PASS_V(nir, nir_remove_dead_variables,
               nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
 
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
-              nir_address_format_64bit_global);
-
    NIR_PASS_V(nir, nir_propagate_invariant);
    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-              entry_point->impl, true, false);
+              nir_shader_get_entrypoint(nir), true, false);
 
    NIR_PASS_V(nir, nir_lower_frexp);
 
    /* Vulkan uses the separate-shader linking model */
    nir->info.separate_shader = true;
 
-   nir = brw_preprocess_nir(compiler, nir, NULL);
+   brw_preprocess_nir(compiler, nir, NULL);
 
    return nir;
 }
@@ -303,13 +351,27 @@ populate_sampler_prog_key(const struct gen_device_info *devinfo,
    }
 }
 
+static void
+populate_base_prog_key(const struct gen_device_info *devinfo,
+                       VkPipelineShaderStageCreateFlags flags,
+                       struct brw_base_prog_key *key)
+{
+   if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
+      key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
+   else
+      key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
+
+   populate_sampler_prog_key(devinfo, &key->tex);
+}
+
 static void
 populate_vs_prog_key(const struct gen_device_info *devinfo,
+                     VkPipelineShaderStageCreateFlags flags,
                      struct brw_vs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
 
    /* XXX: Handle vertex input work-arounds */
 
@@ -318,43 +380,47 @@ populate_vs_prog_key(const struct gen_device_info *devinfo,
 
 static void
 populate_tcs_prog_key(const struct gen_device_info *devinfo,
+                      VkPipelineShaderStageCreateFlags flags,
                       unsigned input_vertices,
                       struct brw_tcs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
 
    key->input_vertices = input_vertices;
 }
 
 static void
 populate_tes_prog_key(const struct gen_device_info *devinfo,
+                      VkPipelineShaderStageCreateFlags flags,
                       struct brw_tes_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
 }
 
 static void
 populate_gs_prog_key(const struct gen_device_info *devinfo,
+                     VkPipelineShaderStageCreateFlags flags,
                      struct brw_gs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
 }
 
 static void
 populate_wm_prog_key(const struct gen_device_info *devinfo,
+                     VkPipelineShaderStageCreateFlags flags,
                      const struct anv_subpass *subpass,
                      const VkPipelineMultisampleStateCreateInfo *ms_info,
                      struct brw_wm_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
 
    /* We set this to 0 here and set to the actual value before we call
     * brw_compile_fs.
@@ -375,30 +441,63 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
 
    key->nr_color_regions = util_bitcount(key->color_outputs_valid);
 
-   key->replicate_alpha = key->nr_color_regions > 1 &&
-                          ms_info && ms_info->alphaToCoverageEnable;
+   /* To reduce possible shader recompilations we would need to know if
+    * there is a SampleMask output variable to compute if we should emit
+    * code to workaround the issue that hardware disables alpha to coverage
+    * when there is SampleMask output.
+    */
+   key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
+
+   /* Vulkan doesn't support fixed-function alpha test */
+   key->alpha_test_replicate_alpha = false;
 
    if (ms_info) {
       /* We should probably pull this out of the shader, but it's fairly
        * harmless to compute it and then let dead-code take care of it.
        */
       if (ms_info->rasterizationSamples > 1) {
-         key->persample_interp =
+         key->persample_interp = ms_info->sampleShadingEnable &&
             (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
          key->multisample_fbo = true;
       }
 
-      key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
+      key->frag_coord_adds_sample_pos = key->persample_interp;
    }
 }
 
 static void
 populate_cs_prog_key(const struct gen_device_info *devinfo,
+                     VkPipelineShaderStageCreateFlags flags,
+                     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
                      struct brw_cs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_base_prog_key(devinfo, flags, &key->base);
+
+   if (rss_info) {
+      assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
+
+      /* These enum values are expressly chosen to be equal to the subgroup
+       * size that they require.
+       */
+      assert(rss_info->requiredSubgroupSize == 8 ||
+             rss_info->requiredSubgroupSize == 16 ||
+             rss_info->requiredSubgroupSize == 32);
+      key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
+   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
+      /* If the client expressly requests full subgroups and they don't
+       * specify a subgroup size, we need to pick one.  If they're requested
+       * varying subgroup sizes, we set it to UNIFORM and let the back-end
+       * compiler pick.  Otherwise, we specify the API value of 32.
+       * Performance will likely be terrible in this case but there's nothing
+       * we can do about that.  The client should have chosen a size.
+       */
+      if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
+         key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
+      else
+         key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
+   }
 }
 
 struct anv_pipeline_stage {
@@ -544,15 +643,16 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
                        struct anv_pipeline_stage *stage,
                        struct anv_pipeline_layout *layout)
 {
-   const struct brw_compiler *compiler =
-      pipeline->device->instance->physicalDevice.compiler;
+   const struct anv_physical_device *pdevice =
+      &pipeline->device->instance->physicalDevice;
+   const struct brw_compiler *compiler = pdevice->compiler;
 
    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
    nir_shader *nir = stage->nir;
 
    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
       NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
-      NIR_PASS_V(nir, anv_nir_lower_input_attachments);
+      NIR_PASS_V(nir, nir_lower_input_attachments, false);
    }
 
    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
@@ -596,18 +696,30 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
 
    NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
 
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
+              nir_address_format_64bit_global);
+
    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
    if (layout) {
-      anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
+      anv_nir_apply_pipeline_layout(pdevice,
                                     pipeline->device->robust_buffer_access,
                                     layout, nir, prog_data,
                                     &stage->bind_map);
 
-      NIR_PASS_V(nir, nir_lower_explicit_io,
-                 nir_var_mem_ubo | nir_var_mem_ssbo,
+      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
                  nir_address_format_32bit_index_offset);
+      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+                 anv_nir_ssbo_addr_format(pdevice,
+                    pipeline->device->robust_buffer_access));
 
       NIR_PASS_V(nir, nir_opt_constant_folding);
+
+      /* We don't support non-uniform UBOs and non-uniform SSBO access is
+       * handled naturally by falling back to A64 messages.
+       */
+      NIR_PASS_V(nir, nir_lower_non_uniform_access,
+                 nir_lower_non_uniform_texture_access |
+                 nir_lower_non_uniform_image_access);
    }
 
    if (nir->info.stage != MESA_SHADER_COMPUTE)
@@ -624,7 +736,7 @@ anv_pipeline_link_vs(const struct brw_compiler *compiler,
                      struct anv_pipeline_stage *next_stage)
 {
    if (next_stage)
-      brw_nir_link_shaders(compiler, &vs_stage->nir, &next_stage->nir);
+      brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
 }
 
 static const unsigned *
@@ -688,7 +800,7 @@ anv_pipeline_link_tcs(const struct brw_compiler *compiler,
 {
    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
 
-   brw_nir_link_shaders(compiler, &tcs_stage->nir, &tes_stage->nir);
+   brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
 
    nir_lower_patch_vertices(tes_stage->nir,
                             tcs_stage->nir->info.tess.tcs_vertices_out,
@@ -732,7 +844,7 @@ anv_pipeline_link_tes(const struct brw_compiler *compiler,
                       struct anv_pipeline_stage *next_stage)
 {
    if (next_stage)
-      brw_nir_link_shaders(compiler, &tes_stage->nir, &next_stage->nir);
+      brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
 }
 
 static const unsigned *
@@ -759,7 +871,7 @@ anv_pipeline_link_gs(const struct brw_compiler *compiler,
                      struct anv_pipeline_stage *next_stage)
 {
    if (next_stage)
-      brw_nir_link_shaders(compiler, &gs_stage->nir, &next_stage->nir);
+      brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
 }
 
 static const unsigned *
@@ -798,14 +910,24 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      /* Unused or out-of-bounds */
-      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
+      /* Out-of-bounds */
+      if (rt >= MAX_RTS)
          continue;
 
       const unsigned array_len =
          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
       assert(rt + array_len <= max_rt);
 
+      /* Unused */
+      if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
+         /* If this is the RT at location 0 and we have alpha to coverage
+          * enabled we will have to create a null RT for it, so mark it as
+          * used.
+          */
+         if (rt > 0 || !stage->key.wm.alpha_to_coverage)
+            continue;
+      }
+
       for (unsigned i = 0; i < array_len; i++)
          rt_used[rt + i] = true;
    }
@@ -816,11 +938,22 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       rt_to_bindings[i] = num_rts;
-      rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
-         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
-         .binding = 0,
-         .index = i,
-      };
+
+      if (stage->key.wm.color_outputs_valid & (1 << i)) {
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = i,
+         };
+      } else {
+         /* Setup a null render target */
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = UINT32_MAX,
+         };
+      }
+
       num_rts++;
    }
 
@@ -830,9 +963,11 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      if (rt >= MAX_RTS ||
-          !(stage->key.wm.color_outputs_valid & (1 << rt))) {
-         /* Unused or out-of-bounds, throw it away */
+
+      if (rt >= MAX_RTS || !rt_used[rt]) {
+         /* Unused or out-of-bounds, throw it away, unless it is the first
+          * RT and we have alpha to coverage enabled.
+          */
          deleted_output = true;
          var->data.mode = nir_var_function_temp;
          exec_node_remove(&var->node);
@@ -944,21 +1079,22 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
       const struct gen_device_info *devinfo = &pipeline->device->info;
       switch (stage) {
       case MESA_SHADER_VERTEX:
-         populate_vs_prog_key(devinfo, &stages[stage].key.vs);
+         populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
          break;
       case MESA_SHADER_TESS_CTRL:
-         populate_tcs_prog_key(devinfo,
+         populate_tcs_prog_key(devinfo, sinfo->flags,
                                info->pTessellationState->patchControlPoints,
                                &stages[stage].key.tcs);
          break;
       case MESA_SHADER_TESS_EVAL:
-         populate_tes_prog_key(devinfo, &stages[stage].key.tes);
+         populate_tes_prog_key(devinfo, sinfo->flags, &stages[stage].key.tes);
          break;
       case MESA_SHADER_GEOMETRY:
-         populate_gs_prog_key(devinfo, &stages[stage].key.gs);
+         populate_gs_prog_key(devinfo, sinfo->flags, &stages[stage].key.gs);
          break;
       case MESA_SHADER_FRAGMENT:
-         populate_wm_prog_key(devinfo, pipeline->subpass,
+         populate_wm_prog_key(devinfo, sinfo->flags,
+                              pipeline->subpass,
                               info->pMultisampleState,
                               &stages[stage].key.wm);
          break;
@@ -1252,7 +1388,12 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 
    struct anv_shader_bin *bin = NULL;
 
-   populate_cs_prog_key(&pipeline->device->info, &stage.key.cs);
+   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
+      vk_find_struct_const(info->stage.pNext,
+                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+   populate_cs_prog_key(&pipeline->device->info, info->stage.flags,
+                        rss_info, &stage.key.cs);
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);