src/intel/vulkan/anv_pipeline.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28 #include <fcntl.h>
  29
  30 #include "util/mesa-sha1.h"
  31 #include "util/os_time.h"
  32 #include "common/gen_l3_config.h"
  33 #include "common/gen_disasm.h"
  34 #include "anv_private.h"
  35 #include "compiler/brw_nir.h"
  36 #include "anv_nir.h"
  37 #include "nir/nir_xfb_info.h"
  38 #include "spirv/nir_spirv.h"
  39 #include "vk_util.h"
  40
  41 /* Needed for SWIZZLE macros */
  42 #include "program/prog_instruction.h"
  43
  44 // Shader functions
  45
  46 VkResult anv_CreateShaderModule(
  47     VkDevice                                    _device,
  48     const VkShaderModuleCreateInfo*             pCreateInfo,
  49     const VkAllocationCallbacks*                pAllocator,
  50     VkShaderModule*                             pShaderModule)
  51 {
  52    ANV_FROM_HANDLE(anv_device, device, _device);
  53    struct anv_shader_module *module;
  54
  55    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
  56    assert(pCreateInfo->flags == 0);
  57
  58    module = vk_alloc2(&device->vk.alloc, pAllocator,
  59                        sizeof(*module) + pCreateInfo->codeSize, 8,
  60                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  61    if (module == NULL)
  62       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  63
  64    vk_object_base_init(&device->vk, &module->base,
  65                        VK_OBJECT_TYPE_SHADER_MODULE);
  66    module->size = pCreateInfo->codeSize;
  67    memcpy(module->data, pCreateInfo->pCode, module->size);
  68
  69    _mesa_sha1_compute(module->data, module->size, module->sha1);
  70
  71    *pShaderModule = anv_shader_module_to_handle(module);
  72
  73    return VK_SUCCESS;
  74 }
  75
  76 void anv_DestroyShaderModule(
  77     VkDevice                                    _device,
  78     VkShaderModule                              _module,
  79     const VkAllocationCallbacks*                pAllocator)
  80 {
  81    ANV_FROM_HANDLE(anv_device, device, _device);
  82    ANV_FROM_HANDLE(anv_shader_module, module, _module);
  83
  84    if (!module)
  85       return;
  86
  87    vk_object_base_finish(&module->base);
  88    vk_free2(&device->vk.alloc, pAllocator, module);
  89 }
  90
  91 #define SPIR_V_MAGIC_NUMBER 0x07230203
  92
  93 struct anv_spirv_debug_data {
  94    struct anv_device *device;
  95    const struct anv_shader_module *module;
  96 };
  97
  98 static void anv_spirv_nir_debug(void *private_data,
  99                                 enum nir_spirv_debug_level level,
 100                                 size_t spirv_offset,
 101                                 const char *message)
 102 {
 103    struct anv_spirv_debug_data *debug_data = private_data;
 104    struct anv_instance *instance = debug_data->device->physical->instance;
 105
 106    static const VkDebugReportFlagsEXT vk_flags[] = {
 107       [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
 108       [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
 109       [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
 110    };
 111    char buffer[256];
 112
 113    snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
 114
 115    vk_debug_report(&instance->debug_report_callbacks,
 116                    vk_flags[level],
 117                    VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
 118                    (uint64_t) (uintptr_t) debug_data->module,
 119                    0, 0, "anv", buffer);
 120 }
 121
 122 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
 123  * we can't do that yet because we don't have the ability to copy nir.
 124  */
 125 static nir_shader *
 126 anv_shader_compile_to_nir(struct anv_device *device,
 127                           void *mem_ctx,
 128                           const struct anv_shader_module *module,
 129                           const char *entrypoint_name,
 130                           gl_shader_stage stage,
 131                           const VkSpecializationInfo *spec_info)
 132 {
 133    const struct anv_physical_device *pdevice = device->physical;
 134    const struct brw_compiler *compiler = pdevice->compiler;
 135    const nir_shader_compiler_options *nir_options =
 136       compiler->glsl_compiler_options[stage].NirOptions;
 137
 138    uint32_t *spirv = (uint32_t *) module->data;
 139    assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
 140    assert(module->size % 4 == 0);
 141
 142    uint32_t num_spec_entries = 0;
 143    struct nir_spirv_specialization *spec_entries = NULL;
 144    if (spec_info && spec_info->mapEntryCount > 0) {
 145       num_spec_entries = spec_info->mapEntryCount;
 146       spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
 147       for (uint32_t i = 0; i < num_spec_entries; i++) {
 148          VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
 149          const void *data = spec_info->pData + entry.offset;
 150          assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 151
 152          spec_entries[i].id = spec_info->pMapEntries[i].constantID;
 153          switch (entry.size) {
 154          case 8:
 155             spec_entries[i].value.u64 = *(const uint64_t *)data;
 156             break;
 157          case 4:
 158             spec_entries[i].value.u32 = *(const uint32_t *)data;
 159             break;
 160          case 2:
 161             spec_entries[i].value.u16 = *(const uint16_t *)data;
 162             break;
 163          case 1:
 164             spec_entries[i].value.u8 = *(const uint8_t *)data;
 165             break;
 166          default:
 167             assert(!"Invalid spec constant size");
 168             break;
 169          }
 170       }
 171    }
 172
 173    struct anv_spirv_debug_data spirv_debug_data = {
 174       .device = device,
 175       .module = module,
 176    };
 177    struct spirv_to_nir_options spirv_options = {
 178       .frag_coord_is_sysval = true,
 179       .caps = {
 180          .demote_to_helper_invocation = true,
 181          .derivative_group = true,
 182          .descriptor_array_dynamic_indexing = true,
 183          .descriptor_array_non_uniform_indexing = true,
 184          .descriptor_indexing = true,
 185          .device_group = true,
 186          .draw_parameters = true,
 187          .float16 = pdevice->info.gen >= 8,
 188          .float64 = pdevice->info.gen >= 8,
 189          .fragment_shader_sample_interlock = pdevice->info.gen >= 9,
 190          .fragment_shader_pixel_interlock = pdevice->info.gen >= 9,
 191          .geometry_streams = true,
 192          .image_write_without_format = true,
 193          .int8 = pdevice->info.gen >= 8,
 194          .int16 = pdevice->info.gen >= 8,
 195          .int64 = pdevice->info.gen >= 8,
 196          .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
 197          .integer_functions2 = pdevice->info.gen >= 8,
 198          .min_lod = true,
 199          .multiview = true,
 200          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
 201          .post_depth_coverage = pdevice->info.gen >= 9,
 202          .runtime_descriptor_array = true,
 203          .float_controls = pdevice->info.gen >= 8,
 204          .shader_clock = true,
 205          .shader_viewport_index_layer = true,
 206          .stencil_export = pdevice->info.gen >= 9,
 207          .storage_8bit = pdevice->info.gen >= 8,
 208          .storage_16bit = pdevice->info.gen >= 8,
 209          .subgroup_arithmetic = true,
 210          .subgroup_basic = true,
 211          .subgroup_ballot = true,
 212          .subgroup_quad = true,
 213          .subgroup_shuffle = true,
 214          .subgroup_vote = true,
 215          .tessellation = true,
 216          .transform_feedback = pdevice->info.gen >= 8,
 217          .variable_pointers = true,
 218          .vk_memory_model = true,
 219          .vk_memory_model_device_scope = true,
 220       },
 221       .ubo_addr_format = nir_address_format_32bit_index_offset,
 222       .ssbo_addr_format =
 223           anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
 224       .phys_ssbo_addr_format = nir_address_format_64bit_global,
 225       .push_const_addr_format = nir_address_format_logical,
 226
 227       /* TODO: Consider changing this to an address format that has the NULL
 228        * pointer equals to 0.  That might be a better format to play nice
 229        * with certain code / code generators.
 230        */
 231       .shared_addr_format = nir_address_format_32bit_offset,
 232       .debug = {
 233          .func = anv_spirv_nir_debug,
 234          .private_data = &spirv_debug_data,
 235       },
 236    };
 237
 238
 239    nir_shader *nir =
 240       spirv_to_nir(spirv, module->size / 4,
 241                    spec_entries, num_spec_entries,
 242                    stage, entrypoint_name, &spirv_options, nir_options);
 243    assert(nir->info.stage == stage);
 244    nir_validate_shader(nir, "after spirv_to_nir");
 245    ralloc_steal(mem_ctx, nir);
 246
 247    free(spec_entries);
 248
 249    if (unlikely(INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage))) {
 250       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
 251               gl_shader_stage_name(stage));
 252       nir_print_shader(nir, stderr);
 253    }
 254
 255    /* We have to lower away local constant initializers right before we
 256     * inline functions.  That way they get properly initialized at the top
 257     * of the function and not at the top of its caller.
 258     */
 259    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
 260    NIR_PASS_V(nir, nir_lower_returns);
 261    NIR_PASS_V(nir, nir_inline_functions);
 262    NIR_PASS_V(nir, nir_opt_deref);
 263
 264    /* Pick off the single entrypoint that we want */
 265    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
 266       if (!func->is_entrypoint)
 267          exec_node_remove(&func->node);
 268    }
 269    assert(exec_list_length(&nir->functions) == 1);
 270
 271    /* Now that we've deleted all but the main function, we can go ahead and
 272     * lower the rest of the constant initializers.  We do this here so that
 273     * nir_remove_dead_variables and split_per_member_structs below see the
 274     * corresponding stores.
 275     */
 276    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
 277
 278    /* Split member structs.  We do this before lower_io_to_temporaries so that
 279     * it doesn't lower system values to temporaries by accident.
 280     */
 281    NIR_PASS_V(nir, nir_split_var_copies);
 282    NIR_PASS_V(nir, nir_split_per_member_structs);
 283
 284    NIR_PASS_V(nir, nir_remove_dead_variables,
 285               nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
 286               NULL);
 287
 288    NIR_PASS_V(nir, nir_propagate_invariant);
 289    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 290               nir_shader_get_entrypoint(nir), true, false);
 291
 292    NIR_PASS_V(nir, nir_lower_frexp);
 293
 294    /* Vulkan uses the separate-shader linking model */
 295    nir->info.separate_shader = true;
 296
 297    brw_preprocess_nir(compiler, nir, NULL);
 298
 299    return nir;
 300 }
 301
 302 void anv_DestroyPipeline(
 303     VkDevice                                    _device,
 304     VkPipeline                                  _pipeline,
 305     const VkAllocationCallbacks*                pAllocator)
 306 {
 307    ANV_FROM_HANDLE(anv_device, device, _device);
 308    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
 309
 310    if (!pipeline)
 311       return;
 312
 313    anv_reloc_list_finish(&pipeline->batch_relocs,
 314                          pAllocator ? pAllocator : &device->vk.alloc);
 315
 316    ralloc_free(pipeline->mem_ctx);
 317
 318    switch (pipeline->type) {
 319    case ANV_PIPELINE_GRAPHICS: {
 320       struct anv_graphics_pipeline *gfx_pipeline =
 321          anv_pipeline_to_graphics(pipeline);
 322
 323       if (gfx_pipeline->blend_state.map)
 324          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
 325
 326       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 327          if (gfx_pipeline->shaders[s])
 328             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
 329       }
 330       break;
 331    }
 332
 333    case ANV_PIPELINE_COMPUTE: {
 334       struct anv_compute_pipeline *compute_pipeline =
 335          anv_pipeline_to_compute(pipeline);
 336
 337       if (compute_pipeline->cs)
 338          anv_shader_bin_unref(device, compute_pipeline->cs);
 339
 340       break;
 341    }
 342
 343    default:
 344       unreachable("invalid pipeline type");
 345    }
 346
 347    vk_object_base_finish(&pipeline->base);
 348    vk_free2(&device->vk.alloc, pAllocator, pipeline);
 349 }
 350
 351 static const uint32_t vk_to_gen_primitive_type[] = {
 352    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
 353    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
 354    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
 355    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
 356    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
 357    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
 358    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
 359    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
 360    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
 361    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
 362 };
 363
 364 static void
 365 populate_sampler_prog_key(const struct gen_device_info *devinfo,
 366                           struct brw_sampler_prog_key_data *key)
 367 {
 368    /* Almost all multisampled textures are compressed.  The only time when we
 369     * don't compress a multisampled texture is for 16x MSAA with a surface
 370     * width greater than 8k which is a bit of an edge case.  Since the sampler
 371     * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
 372     * to tell the compiler to always assume compression.
 373     */
 374    key->compressed_multisample_layout_mask = ~0;
 375
 376    /* SkyLake added support for 16x MSAA.  With this came a new message for
 377     * reading from a 16x MSAA surface with compression.  The new message was
 378     * needed because now the MCS data is 64 bits instead of 32 or lower as is
 379     * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
 380     * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
 381     * so we can just use it unconditionally.  This may not be quite as
 382     * efficient but it saves us from recompiling.
 383     */
 384    if (devinfo->gen >= 9)
 385       key->msaa_16 = ~0;
 386
 387    /* XXX: Handle texture swizzle on HSW- */
 388    for (int i = 0; i < MAX_SAMPLERS; i++) {
 389       /* Assume color sampler, no swizzling. (Works for BDW+) */
 390       key->swizzles[i] = SWIZZLE_XYZW;
 391    }
 392 }
 393
 394 static void
 395 populate_base_prog_key(const struct gen_device_info *devinfo,
 396                        VkPipelineShaderStageCreateFlags flags,
 397                        struct brw_base_prog_key *key)
 398 {
 399    if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
 400       key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
 401    else
 402       key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
 403
 404    populate_sampler_prog_key(devinfo, &key->tex);
 405 }
 406
 407 static void
 408 populate_vs_prog_key(const struct gen_device_info *devinfo,
 409                      VkPipelineShaderStageCreateFlags flags,
 410                      struct brw_vs_prog_key *key)
 411 {
 412    memset(key, 0, sizeof(*key));
 413
 414    populate_base_prog_key(devinfo, flags, &key->base);
 415
 416    /* XXX: Handle vertex input work-arounds */
 417
 418    /* XXX: Handle sampler_prog_key */
 419 }
 420
 421 static void
 422 populate_tcs_prog_key(const struct gen_device_info *devinfo,
 423                       VkPipelineShaderStageCreateFlags flags,
 424                       unsigned input_vertices,
 425                       struct brw_tcs_prog_key *key)
 426 {
 427    memset(key, 0, sizeof(*key));
 428
 429    populate_base_prog_key(devinfo, flags, &key->base);
 430
 431    key->input_vertices = input_vertices;
 432 }
 433
 434 static void
 435 populate_tes_prog_key(const struct gen_device_info *devinfo,
 436                       VkPipelineShaderStageCreateFlags flags,
 437                       struct brw_tes_prog_key *key)
 438 {
 439    memset(key, 0, sizeof(*key));
 440
 441    populate_base_prog_key(devinfo, flags, &key->base);
 442 }
 443
 444 static void
 445 populate_gs_prog_key(const struct gen_device_info *devinfo,
 446                      VkPipelineShaderStageCreateFlags flags,
 447                      struct brw_gs_prog_key *key)
 448 {
 449    memset(key, 0, sizeof(*key));
 450
 451    populate_base_prog_key(devinfo, flags, &key->base);
 452 }
 453
 454 static void
 455 populate_wm_prog_key(const struct gen_device_info *devinfo,
 456                      VkPipelineShaderStageCreateFlags flags,
 457                      const struct anv_subpass *subpass,
 458                      const VkPipelineMultisampleStateCreateInfo *ms_info,
 459                      struct brw_wm_prog_key *key)
 460 {
 461    memset(key, 0, sizeof(*key));
 462
 463    populate_base_prog_key(devinfo, flags, &key->base);
 464
 465    /* We set this to 0 here and set to the actual value before we call
 466     * brw_compile_fs.
 467     */
 468    key->input_slots_valid = 0;
 469
 470    /* Vulkan doesn't specify a default */
 471    key->high_quality_derivatives = false;
 472
 473    /* XXX Vulkan doesn't appear to specify */
 474    key->clamp_fragment_color = false;
 475
 476    assert(subpass->color_count <= MAX_RTS);
 477    for (uint32_t i = 0; i < subpass->color_count; i++) {
 478       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
 479          key->color_outputs_valid |= (1 << i);
 480    }
 481
 482    key->nr_color_regions = subpass->color_count;
 483
 484    /* To reduce possible shader recompilations we would need to know if
 485     * there is a SampleMask output variable to compute if we should emit
 486     * code to workaround the issue that hardware disables alpha to coverage
 487     * when there is SampleMask output.
 488     */
 489    key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
 490
 491    /* Vulkan doesn't support fixed-function alpha test */
 492    key->alpha_test_replicate_alpha = false;
 493
 494    if (ms_info) {
 495       /* We should probably pull this out of the shader, but it's fairly
 496        * harmless to compute it and then let dead-code take care of it.
 497        */
 498       if (ms_info->rasterizationSamples > 1) {
 499          key->persample_interp = ms_info->sampleShadingEnable &&
 500             (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
 501          key->multisample_fbo = true;
 502       }
 503
 504       key->frag_coord_adds_sample_pos = key->persample_interp;
 505    }
 506 }
 507
 508 static void
 509 populate_cs_prog_key(const struct gen_device_info *devinfo,
 510                      VkPipelineShaderStageCreateFlags flags,
 511                      const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
 512                      struct brw_cs_prog_key *key)
 513 {
 514    memset(key, 0, sizeof(*key));
 515
 516    populate_base_prog_key(devinfo, flags, &key->base);
 517
 518    if (rss_info) {
 519       assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
 520
 521       /* These enum values are expressly chosen to be equal to the subgroup
 522        * size that they require.
 523        */
 524       assert(rss_info->requiredSubgroupSize == 8 ||
 525              rss_info->requiredSubgroupSize == 16 ||
 526              rss_info->requiredSubgroupSize == 32);
 527       key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
 528    } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
 529       /* If the client expressly requests full subgroups and they don't
 530        * specify a subgroup size, we need to pick one.  If they're requested
 531        * varying subgroup sizes, we set it to UNIFORM and let the back-end
 532        * compiler pick.  Otherwise, we specify the API value of 32.
 533        * Performance will likely be terrible in this case but there's nothing
 534        * we can do about that.  The client should have chosen a size.
 535        */
 536       if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
 537          key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
 538       else
 539          key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
 540    }
 541 }
 542
 543 struct anv_pipeline_stage {
 544    gl_shader_stage stage;
 545
 546    const struct anv_shader_module *module;
 547    const char *entrypoint;
 548    const VkSpecializationInfo *spec_info;
 549
 550    unsigned char shader_sha1[20];
 551
 552    union brw_any_prog_key key;
 553
 554    struct {
 555       gl_shader_stage stage;
 556       unsigned char sha1[20];
 557    } cache_key;
 558
 559    nir_shader *nir;
 560
 561    struct anv_pipeline_binding surface_to_descriptor[256];
 562    struct anv_pipeline_binding sampler_to_descriptor[256];
 563    struct anv_pipeline_bind_map bind_map;
 564
 565    union brw_any_prog_data prog_data;
 566
 567    uint32_t num_stats;
 568    struct brw_compile_stats stats[3];
 569    char *disasm[3];
 570
 571    VkPipelineCreationFeedbackEXT feedback;
 572
 573    const unsigned *code;
 574 };
 575
 576 static void
 577 anv_pipeline_hash_shader(const struct anv_shader_module *module,
 578                          const char *entrypoint,
 579                          gl_shader_stage stage,
 580                          const VkSpecializationInfo *spec_info,
 581                          unsigned char *sha1_out)
 582 {
 583    struct mesa_sha1 ctx;
 584    _mesa_sha1_init(&ctx);
 585
 586    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
 587    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
 588    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
 589    if (spec_info) {
 590       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
 591                         spec_info->mapEntryCount *
 592                         sizeof(*spec_info->pMapEntries));
 593       _mesa_sha1_update(&ctx, spec_info->pData,
 594                         spec_info->dataSize);
 595    }
 596
 597    _mesa_sha1_final(&ctx, sha1_out);
 598 }
 599
 600 static void
 601 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
 602                            struct anv_pipeline_layout *layout,
 603                            struct anv_pipeline_stage *stages,
 604                            unsigned char *sha1_out)
 605 {
 606    struct mesa_sha1 ctx;
 607    _mesa_sha1_init(&ctx);
 608
 609    _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
 610                      sizeof(pipeline->subpass->view_mask));
 611
 612    if (layout)
 613       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 614
 615    const bool rba = pipeline->base.device->robust_buffer_access;
 616    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 617
 618    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 619       if (stages[s].entrypoint) {
 620          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
 621                            sizeof(stages[s].shader_sha1));
 622          _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
 623       }
 624    }
 625
 626    _mesa_sha1_final(&ctx, sha1_out);
 627 }
 628
 629 static void
 630 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
 631                           struct anv_pipeline_layout *layout,
 632                           struct anv_pipeline_stage *stage,
 633                           unsigned char *sha1_out)
 634 {
 635    struct mesa_sha1 ctx;
 636    _mesa_sha1_init(&ctx);
 637
 638    if (layout)
 639       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 640
 641    const bool rba = pipeline->base.device->robust_buffer_access;
 642    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 643
 644    _mesa_sha1_update(&ctx, stage->shader_sha1,
 645                      sizeof(stage->shader_sha1));
 646    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
 647
 648    _mesa_sha1_final(&ctx, sha1_out);
 649 }
 650
 651 static nir_shader *
 652 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
 653                            struct anv_pipeline_cache *cache,
 654                            void *mem_ctx,
 655                            struct anv_pipeline_stage *stage)
 656 {
 657    const struct brw_compiler *compiler =
 658       pipeline->device->physical->compiler;
 659    const nir_shader_compiler_options *nir_options =
 660       compiler->glsl_compiler_options[stage->stage].NirOptions;
 661    nir_shader *nir;
 662
 663    nir = anv_device_search_for_nir(pipeline->device, cache,
 664                                    nir_options,
 665                                    stage->shader_sha1,
 666                                    mem_ctx);
 667    if (nir) {
 668       assert(nir->info.stage == stage->stage);
 669       return nir;
 670    }
 671
 672    nir = anv_shader_compile_to_nir(pipeline->device,
 673                                    mem_ctx,
 674                                    stage->module,
 675                                    stage->entrypoint,
 676                                    stage->stage,
 677                                    stage->spec_info);
 678    if (nir) {
 679       anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
 680       return nir;
 681    }
 682
 683    return NULL;
 684 }
 685
 686 static void
 687 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
 688                        void *mem_ctx,
 689                        struct anv_pipeline_stage *stage,
 690                        struct anv_pipeline_layout *layout)
 691 {
 692    const struct anv_physical_device *pdevice = pipeline->device->physical;
 693    const struct brw_compiler *compiler = pdevice->compiler;
 694
 695    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
 696    nir_shader *nir = stage->nir;
 697
 698    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 699       NIR_PASS_V(nir, nir_lower_wpos_center,
 700                  anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
 701       NIR_PASS_V(nir, nir_lower_input_attachments, true);
 702    }
 703
 704    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
 705
 706    if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
 707       NIR_PASS_V(nir, anv_nir_lower_multiview,
 708                  anv_pipeline_to_graphics(pipeline));
 709    }
 710
 711    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 712
 713    NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL);
 714
 715    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
 716               nir_address_format_64bit_global);
 717
 718    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
 719    anv_nir_apply_pipeline_layout(pdevice,
 720                                  pipeline->device->robust_buffer_access,
 721                                  layout, nir, &stage->bind_map);
 722
 723    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
 724               nir_address_format_32bit_index_offset);
 725    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
 726               anv_nir_ssbo_addr_format(pdevice,
 727                  pipeline->device->robust_buffer_access));
 728
 729    NIR_PASS_V(nir, nir_opt_constant_folding);
 730
 731    /* We don't support non-uniform UBOs and non-uniform SSBO access is
 732     * handled naturally by falling back to A64 messages.
 733     */
 734    NIR_PASS_V(nir, nir_lower_non_uniform_access,
 735               nir_lower_non_uniform_texture_access |
 736               nir_lower_non_uniform_image_access);
 737
 738    anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
 739                                nir, prog_data, &stage->bind_map, mem_ctx);
 740
 741    stage->nir = nir;
 742 }
 743
 744 static void
 745 anv_pipeline_link_vs(const struct brw_compiler *compiler,
 746                      struct anv_pipeline_stage *vs_stage,
 747                      struct anv_pipeline_stage *next_stage)
 748 {
 749    if (next_stage)
 750       brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
 751 }
 752
 753 static void
 754 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
 755                         void *mem_ctx,
 756                         struct anv_graphics_pipeline *pipeline,
 757                         struct anv_pipeline_stage *vs_stage)
 758 {
 759    /* When using Primitive Replication for multiview, each view gets its own
 760     * position slot.
 761     */
 762    uint32_t pos_slots = pipeline->use_primitive_replication ?
 763       anv_subpass_view_count(pipeline->subpass) : 1;
 764
 765    brw_compute_vue_map(compiler->devinfo,
 766                        &vs_stage->prog_data.vs.base.vue_map,
 767                        vs_stage->nir->info.outputs_written,
 768                        vs_stage->nir->info.separate_shader,
 769                        pos_slots);
 770
 771    vs_stage->num_stats = 1;
 772    vs_stage->code = brw_compile_vs(compiler, pipeline->base.device, mem_ctx,
 773                                    &vs_stage->key.vs,
 774                                    &vs_stage->prog_data.vs,
 775                                    vs_stage->nir, -1,
 776                                    vs_stage->stats, NULL);
 777 }
 778
 779 static void
 780 merge_tess_info(struct shader_info *tes_info,
 781                 const struct shader_info *tcs_info)
 782 {
 783    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
 784     *
 785     *    "PointMode. Controls generation of points rather than triangles
 786     *     or lines. This functionality defaults to disabled, and is
 787     *     enabled if either shader stage includes the execution mode.
 788     *
 789     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
 790     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
 791     * and OutputVertices, it says:
 792     *
 793     *    "One mode must be set in at least one of the tessellation
 794     *     shader stages."
 795     *
 796     * So, the fields can be set in either the TCS or TES, but they must
 797     * agree if set in both.  Our backend looks at TES, so bitwise-or in
 798     * the values from the TCS.
 799     */
 800    assert(tcs_info->tess.tcs_vertices_out == 0 ||
 801           tes_info->tess.tcs_vertices_out == 0 ||
 802           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
 803    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
 804
 805    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
 806           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
 807           tcs_info->tess.spacing == tes_info->tess.spacing);
 808    tes_info->tess.spacing |= tcs_info->tess.spacing;
 809
 810    assert(tcs_info->tess.primitive_mode == 0 ||
 811           tes_info->tess.primitive_mode == 0 ||
 812           tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
 813    tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
 814    tes_info->tess.ccw |= tcs_info->tess.ccw;
 815    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
 816 }
 817
 818 static void
 819 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
 820                       struct anv_pipeline_stage *tcs_stage,
 821                       struct anv_pipeline_stage *tes_stage)
 822 {
 823    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
 824
 825    brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
 826
 827    nir_lower_patch_vertices(tes_stage->nir,
 828                             tcs_stage->nir->info.tess.tcs_vertices_out,
 829                             NULL);
 830
 831    /* Copy TCS info into the TES info */
 832    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
 833
 834    /* Whacking the key after cache lookup is a bit sketchy, but all of
 835     * this comes from the SPIR-V, which is part of the hash used for the
 836     * pipeline cache.  So it should be safe.
 837     */
 838    tcs_stage->key.tcs.tes_primitive_mode =
 839       tes_stage->nir->info.tess.primitive_mode;
 840    tcs_stage->key.tcs.quads_workaround =
 841       compiler->devinfo->gen < 9 &&
 842       tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
 843       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
 844 }
 845
 846 static void
 847 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
 848                          void *mem_ctx,
 849                          struct anv_device *device,
 850                          struct anv_pipeline_stage *tcs_stage,
 851                          struct anv_pipeline_stage *prev_stage)
 852 {
 853    tcs_stage->key.tcs.outputs_written =
 854       tcs_stage->nir->info.outputs_written;
 855    tcs_stage->key.tcs.patch_outputs_written =
 856       tcs_stage->nir->info.patch_outputs_written;
 857
 858    tcs_stage->num_stats = 1;
 859    tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
 860                                      &tcs_stage->key.tcs,
 861                                      &tcs_stage->prog_data.tcs,
 862                                      tcs_stage->nir, -1,
 863                                      tcs_stage->stats, NULL);
 864 }
 865
 866 static void
 867 anv_pipeline_link_tes(const struct brw_compiler *compiler,
 868                       struct anv_pipeline_stage *tes_stage,
 869                       struct anv_pipeline_stage *next_stage)
 870 {
 871    if (next_stage)
 872       brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
 873 }
 874
 875 static void
 876 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
 877                          void *mem_ctx,
 878                          struct anv_device *device,
 879                          struct anv_pipeline_stage *tes_stage,
 880                          struct anv_pipeline_stage *tcs_stage)
 881 {
 882    tes_stage->key.tes.inputs_read =
 883       tcs_stage->nir->info.outputs_written;
 884    tes_stage->key.tes.patch_inputs_read =
 885       tcs_stage->nir->info.patch_outputs_written;
 886
 887    tes_stage->num_stats = 1;
 888    tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
 889                                      &tes_stage->key.tes,
 890                                      &tcs_stage->prog_data.tcs.base.vue_map,
 891                                      &tes_stage->prog_data.tes,
 892                                      tes_stage->nir, -1,
 893                                      tes_stage->stats, NULL);
 894 }
 895
 896 static void
 897 anv_pipeline_link_gs(const struct brw_compiler *compiler,
 898                      struct anv_pipeline_stage *gs_stage,
 899                      struct anv_pipeline_stage *next_stage)
 900 {
 901    if (next_stage)
 902       brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
 903 }
 904
 905 static void
 906 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
 907                         void *mem_ctx,
 908                         struct anv_device *device,
 909                         struct anv_pipeline_stage *gs_stage,
 910                         struct anv_pipeline_stage *prev_stage)
 911 {
 912    brw_compute_vue_map(compiler->devinfo,
 913                        &gs_stage->prog_data.gs.base.vue_map,
 914                        gs_stage->nir->info.outputs_written,
 915                        gs_stage->nir->info.separate_shader, 1);
 916
 917    gs_stage->num_stats = 1;
 918    gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
 919                                    &gs_stage->key.gs,
 920                                    &gs_stage->prog_data.gs,
 921                                    gs_stage->nir, NULL, -1,
 922                                    gs_stage->stats, NULL);
 923 }
 924
 925 static void
 926 anv_pipeline_link_fs(const struct brw_compiler *compiler,
 927                      struct anv_pipeline_stage *stage)
 928 {
 929    unsigned num_rt_bindings;
 930    struct anv_pipeline_binding rt_bindings[MAX_RTS];
 931    if (stage->key.wm.nr_color_regions > 0) {
 932       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
 933       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
 934          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
 935             rt_bindings[rt] = (struct anv_pipeline_binding) {
 936                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 937                .index = rt,
 938             };
 939          } else {
 940             /* Setup a null render target */
 941             rt_bindings[rt] = (struct anv_pipeline_binding) {
 942                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 943                .index = UINT32_MAX,
 944             };
 945          }
 946       }
 947       num_rt_bindings = stage->key.wm.nr_color_regions;
 948    } else {
 949       /* Setup a null render target */
 950       rt_bindings[0] = (struct anv_pipeline_binding) {
 951          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 952          .index = UINT32_MAX,
 953       };
 954       num_rt_bindings = 1;
 955    }
 956
 957    assert(num_rt_bindings <= MAX_RTS);
 958    assert(stage->bind_map.surface_count == 0);
 959    typed_memcpy(stage->bind_map.surface_to_descriptor,
 960                 rt_bindings, num_rt_bindings);
 961    stage->bind_map.surface_count += num_rt_bindings;
 962
 963    /* Now that we've set up the color attachments, we can go through and
 964     * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
 965     * hopes that dead code can clean them up in this and any earlier shader
 966     * stages.
 967     */
 968    nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
 969    bool deleted_output = false;
 970    nir_foreach_variable_safe(var, &stage->nir->outputs) {
 971       /* TODO: We don't delete depth/stencil writes.  We probably could if the
 972        * subpass doesn't have a depth/stencil attachment.
 973        */
 974       if (var->data.location < FRAG_RESULT_DATA0)
 975          continue;
 976
 977       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
 978
 979       /* If this is the RT at location 0 and we have alpha to coverage
 980        * enabled we still need that write because it will affect the coverage
 981        * mask even if it's never written to a color target.
 982        */
 983       if (rt == 0 && stage->key.wm.alpha_to_coverage)
 984          continue;
 985
 986       const unsigned array_len =
 987          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
 988       assert(rt + array_len <= MAX_RTS);
 989
 990       if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
 991                              BITFIELD_RANGE(rt, array_len))) {
 992          deleted_output = true;
 993          var->data.mode = nir_var_function_temp;
 994          exec_node_remove(&var->node);
 995          exec_list_push_tail(&impl->locals, &var->node);
 996       }
 997    }
 998
 999    if (deleted_output)
1000       nir_fixup_deref_modes(stage->nir);
1001
1002    /* We stored the number of subpass color attachments in nr_color_regions
1003     * when calculating the key for caching.  Now that we've computed the bind
1004     * map, we can reduce this to the actual max before we go into the back-end
1005     * compiler.
1006     */
1007    stage->key.wm.nr_color_regions =
1008       util_last_bit(stage->key.wm.color_outputs_valid);
1009 }
1010
1011 static void
1012 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1013                         void *mem_ctx,
1014                         struct anv_device *device,
1015                         struct anv_pipeline_stage *fs_stage,
1016                         struct anv_pipeline_stage *prev_stage)
1017 {
1018    /* TODO: we could set this to 0 based on the information in nir_shader, but
1019     * we need this before we call spirv_to_nir.
1020     */
1021    assert(prev_stage);
1022    fs_stage->key.wm.input_slots_valid =
1023       prev_stage->prog_data.vue.vue_map.slots_valid;
1024
1025    fs_stage->code = brw_compile_fs(compiler, device, mem_ctx,
1026                                    &fs_stage->key.wm,
1027                                    &fs_stage->prog_data.wm,
1028                                    fs_stage->nir, -1, -1, -1,
1029                                    true, false, NULL,
1030                                    fs_stage->stats, NULL);
1031
1032    fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1033                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1034                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1035
1036    if (fs_stage->key.wm.color_outputs_valid == 0 &&
1037        !fs_stage->prog_data.wm.has_side_effects &&
1038        !fs_stage->prog_data.wm.uses_omask &&
1039        !fs_stage->key.wm.alpha_to_coverage &&
1040        !fs_stage->prog_data.wm.uses_kill &&
1041        fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1042        !fs_stage->prog_data.wm.computed_stencil) {
1043       /* This fragment shader has no outputs and no side effects.  Go ahead
1044        * and return the code pointer so we don't accidentally think the
1045        * compile failed but zero out prog_data which will set program_size to
1046        * zero and disable the stage.
1047        */
1048       memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1049    }
1050 }
1051
1052 static void
1053 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1054                             struct anv_pipeline_stage *stage,
1055                             struct brw_compile_stats *stats,
1056                             uint32_t code_offset)
1057 {
1058    char *nir = NULL;
1059    if (stage->nir &&
1060        (pipeline->flags &
1061         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1062       char *stream_data = NULL;
1063       size_t stream_size = 0;
1064       FILE *stream = open_memstream(&stream_data, &stream_size);
1065
1066       nir_print_shader(stage->nir, stream);
1067
1068       fclose(stream);
1069
1070       /* Copy it to a ralloc'd thing */
1071       nir = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1072       memcpy(nir, stream_data, stream_size);
1073       nir[stream_size] = 0;
1074
1075       free(stream_data);
1076    }
1077
1078    char *disasm = NULL;
1079    if (stage->code &&
1080        (pipeline->flags &
1081         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1082       char *stream_data = NULL;
1083       size_t stream_size = 0;
1084       FILE *stream = open_memstream(&stream_data, &stream_size);
1085
1086       uint32_t push_size = 0;
1087       for (unsigned i = 0; i < 4; i++)
1088          push_size += stage->bind_map.push_ranges[i].length;
1089       if (push_size > 0) {
1090          fprintf(stream, "Push constant ranges:\n");
1091          for (unsigned i = 0; i < 4; i++) {
1092             if (stage->bind_map.push_ranges[i].length == 0)
1093                continue;
1094
1095             fprintf(stream, "    RANGE%d (%dB): ", i,
1096                     stage->bind_map.push_ranges[i].length * 32);
1097
1098             switch (stage->bind_map.push_ranges[i].set) {
1099             case ANV_DESCRIPTOR_SET_NULL:
1100                fprintf(stream, "NULL");
1101                break;
1102
1103             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1104                fprintf(stream, "Vulkan push constants and API params");
1105                break;
1106
1107             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1108                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1109                        stage->bind_map.push_ranges[i].index,
1110                        stage->bind_map.push_ranges[i].start * 32);
1111                break;
1112
1113             case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1114                unreachable("gl_NumWorkgroups is never pushed");
1115
1116             case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1117                fprintf(stream, "Inline shader constant data (start=%dB)",
1118                        stage->bind_map.push_ranges[i].start * 32);
1119                break;
1120
1121             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1122                unreachable("Color attachments can't be pushed");
1123
1124             default:
1125                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1126                        stage->bind_map.push_ranges[i].set,
1127                        stage->bind_map.push_ranges[i].index,
1128                        stage->bind_map.push_ranges[i].start * 32);
1129                break;
1130             }
1131             fprintf(stream, "\n");
1132          }
1133          fprintf(stream, "\n");
1134       }
1135
1136       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1137        * do it for every binary.
1138        */
1139       struct gen_disasm *d = gen_disasm_create(&pipeline->device->info);
1140       gen_disasm_disassemble(d, stage->code, code_offset, stream);
1141       gen_disasm_destroy(d);
1142
1143       fclose(stream);
1144
1145       /* Copy it to a ralloc'd thing */
1146       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1147       memcpy(disasm, stream_data, stream_size);
1148       disasm[stream_size] = 0;
1149
1150       free(stream_data);
1151    }
1152
1153    const struct anv_pipeline_executable exe = {
1154       .stage = stage->stage,
1155       .stats = *stats,
1156       .nir = nir,
1157       .disasm = disasm,
1158    };
1159    util_dynarray_append(&pipeline->executables,
1160                         struct anv_pipeline_executable, exe);
1161 }
1162
1163 static void
1164 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1165                              struct anv_pipeline_stage *stage,
1166                              struct anv_shader_bin *bin)
1167 {
1168    if (stage->stage == MESA_SHADER_FRAGMENT) {
1169       /* We pull the prog data and stats out of the anv_shader_bin because
1170        * the anv_pipeline_stage may not be fully populated if we successfully
1171        * looked up the shader in a cache.
1172        */
1173       const struct brw_wm_prog_data *wm_prog_data =
1174          (const struct brw_wm_prog_data *)bin->prog_data;
1175       struct brw_compile_stats *stats = bin->stats;
1176
1177       if (wm_prog_data->dispatch_8) {
1178          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1179       }
1180
1181       if (wm_prog_data->dispatch_16) {
1182          anv_pipeline_add_executable(pipeline, stage, stats++,
1183                                      wm_prog_data->prog_offset_16);
1184       }
1185
1186       if (wm_prog_data->dispatch_32) {
1187          anv_pipeline_add_executable(pipeline, stage, stats++,
1188                                      wm_prog_data->prog_offset_32);
1189       }
1190    } else {
1191       anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1192    }
1193 }
1194
1195 static void
1196 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1197 {
1198    /* TODO: Cache this pipeline-wide information. */
1199
1200    /* Primitive replication depends on information from all the shaders.
1201     * Recover this bit from the fact that we have more than one position slot
1202     * in the vertex shader when using it.
1203     */
1204    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1205    int pos_slots = 0;
1206    const struct brw_vue_prog_data *vue_prog_data =
1207       (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1208    const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1209    for (int i = 0; i < vue_map->num_slots; i++) {
1210       if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1211          pos_slots++;
1212    }
1213    pipeline->use_primitive_replication = pos_slots > 1;
1214 }
1215
1216 static VkResult
1217 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1218                               struct anv_pipeline_cache *cache,
1219                               const VkGraphicsPipelineCreateInfo *info)
1220 {
1221    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1222       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1223    };
1224    int64_t pipeline_start = os_time_get_nano();
1225
1226    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1227    struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1228
1229    pipeline->active_stages = 0;
1230
1231    VkResult result;
1232    for (uint32_t i = 0; i < info->stageCount; i++) {
1233       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1234       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1235
1236       pipeline->active_stages |= sinfo->stage;
1237
1238       int64_t stage_start = os_time_get_nano();
1239
1240       stages[stage].stage = stage;
1241       stages[stage].module = anv_shader_module_from_handle(sinfo->module);
1242       stages[stage].entrypoint = sinfo->pName;
1243       stages[stage].spec_info = sinfo->pSpecializationInfo;
1244       anv_pipeline_hash_shader(stages[stage].module,
1245                                stages[stage].entrypoint,
1246                                stage,
1247                                stages[stage].spec_info,
1248                                stages[stage].shader_sha1);
1249
1250       const struct gen_device_info *devinfo = &pipeline->base.device->info;
1251       switch (stage) {
1252       case MESA_SHADER_VERTEX:
1253          populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
1254          break;
1255       case MESA_SHADER_TESS_CTRL:
1256          populate_tcs_prog_key(devinfo, sinfo->flags,
1257                                info->pTessellationState->patchControlPoints,
1258                                &stages[stage].key.tcs);
1259          break;
1260       case MESA_SHADER_TESS_EVAL:
1261          populate_tes_prog_key(devinfo, sinfo->flags, &stages[stage].key.tes);
1262          break;
1263       case MESA_SHADER_GEOMETRY:
1264          populate_gs_prog_key(devinfo, sinfo->flags, &stages[stage].key.gs);
1265          break;
1266       case MESA_SHADER_FRAGMENT: {
1267          const bool raster_enabled =
1268             !info->pRasterizationState->rasterizerDiscardEnable;
1269          populate_wm_prog_key(devinfo, sinfo->flags,
1270                               pipeline->subpass,
1271                               raster_enabled ? info->pMultisampleState : NULL,
1272                               &stages[stage].key.wm);
1273          break;
1274       }
1275       default:
1276          unreachable("Invalid graphics shader stage");
1277       }
1278
1279       stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1280       stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1281    }
1282
1283    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1284       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1285
1286    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1287
1288    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1289
1290    unsigned char sha1[20];
1291    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1292
1293    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1294       if (!stages[s].entrypoint)
1295          continue;
1296
1297       stages[s].cache_key.stage = s;
1298       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1299    }
1300
1301    const bool skip_cache_lookup =
1302       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1303
1304    if (!skip_cache_lookup) {
1305       unsigned found = 0;
1306       unsigned cache_hits = 0;
1307       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1308          if (!stages[s].entrypoint)
1309             continue;
1310
1311          int64_t stage_start = os_time_get_nano();
1312
1313          bool cache_hit;
1314          struct anv_shader_bin *bin =
1315             anv_device_search_for_kernel(pipeline->base.device, cache,
1316                                          &stages[s].cache_key,
1317                                          sizeof(stages[s].cache_key), &cache_hit);
1318          if (bin) {
1319             found++;
1320             pipeline->shaders[s] = bin;
1321          }
1322
1323          if (cache_hit) {
1324             cache_hits++;
1325             stages[s].feedback.flags |=
1326                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1327          }
1328          stages[s].feedback.duration += os_time_get_nano() - stage_start;
1329       }
1330
1331       if (found == __builtin_popcount(pipeline->active_stages)) {
1332          if (cache_hits == found) {
1333             pipeline_feedback.flags |=
1334                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1335          }
1336          /* We found all our shaders in the cache.  We're done. */
1337          for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1338             if (!stages[s].entrypoint)
1339                continue;
1340
1341             anv_pipeline_add_executables(&pipeline->base, &stages[s],
1342                                          pipeline->shaders[s]);
1343          }
1344          anv_pipeline_init_from_cached_graphics(pipeline);
1345          goto done;
1346       } else if (found > 0) {
1347          /* We found some but not all of our shaders.  This shouldn't happen
1348           * most of the time but it can if we have a partially populated
1349           * pipeline cache.
1350           */
1351          assert(found < __builtin_popcount(pipeline->active_stages));
1352
1353          vk_debug_report(&pipeline->base.device->physical->instance->debug_report_callbacks,
1354                          VK_DEBUG_REPORT_WARNING_BIT_EXT |
1355                          VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1356                          VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1357                          (uint64_t)(uintptr_t)cache,
1358                          0, 0, "anv",
1359                          "Found a partial pipeline in the cache.  This is "
1360                          "most likely caused by an incomplete pipeline cache "
1361                          "import or export");
1362
1363          /* We're going to have to recompile anyway, so just throw away our
1364           * references to the shaders in the cache.  We'll get them out of the
1365           * cache again as part of the compilation process.
1366           */
1367          for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1368             stages[s].feedback.flags = 0;
1369             if (pipeline->shaders[s]) {
1370                anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1371                pipeline->shaders[s] = NULL;
1372             }
1373          }
1374       }
1375    }
1376
1377    void *pipeline_ctx = ralloc_context(NULL);
1378
1379    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1380       if (!stages[s].entrypoint)
1381          continue;
1382
1383       int64_t stage_start = os_time_get_nano();
1384
1385       assert(stages[s].stage == s);
1386       assert(pipeline->shaders[s] == NULL);
1387
1388       stages[s].bind_map = (struct anv_pipeline_bind_map) {
1389          .surface_to_descriptor = stages[s].surface_to_descriptor,
1390          .sampler_to_descriptor = stages[s].sampler_to_descriptor
1391       };
1392
1393       stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1394                                                  pipeline_ctx,
1395                                                  &stages[s]);
1396       if (stages[s].nir == NULL) {
1397          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1398          goto fail;
1399       }
1400
1401       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1402    }
1403
1404    /* Walk backwards to link */
1405    struct anv_pipeline_stage *next_stage = NULL;
1406    for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1407       if (!stages[s].entrypoint)
1408          continue;
1409
1410       switch (s) {
1411       case MESA_SHADER_VERTEX:
1412          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1413          break;
1414       case MESA_SHADER_TESS_CTRL:
1415          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1416          break;
1417       case MESA_SHADER_TESS_EVAL:
1418          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1419          break;
1420       case MESA_SHADER_GEOMETRY:
1421          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1422          break;
1423       case MESA_SHADER_FRAGMENT:
1424          anv_pipeline_link_fs(compiler, &stages[s]);
1425          break;
1426       default:
1427          unreachable("Invalid graphics shader stage");
1428       }
1429
1430       next_stage = &stages[s];
1431    }
1432
1433    if (pipeline->base.device->info.gen >= 12 &&
1434        pipeline->subpass->view_mask != 0) {
1435       /* For some pipelines HW Primitive Replication can be used instead of
1436        * instancing to implement Multiview.  This depend on how viewIndex is
1437        * used in all the active shaders, so this check can't be done per
1438        * individual shaders.
1439        */
1440       nir_shader *shaders[MESA_SHADER_STAGES] = {};
1441       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1442          shaders[s] = stages[s].nir;
1443
1444       pipeline->use_primitive_replication =
1445          anv_check_for_primitive_replication(shaders, pipeline);
1446    } else {
1447       pipeline->use_primitive_replication = false;
1448    }
1449
1450    struct anv_pipeline_stage *prev_stage = NULL;
1451    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1452       if (!stages[s].entrypoint)
1453          continue;
1454
1455       int64_t stage_start = os_time_get_nano();
1456
1457       void *stage_ctx = ralloc_context(NULL);
1458
1459       nir_xfb_info *xfb_info = NULL;
1460       if (s == MESA_SHADER_VERTEX ||
1461           s == MESA_SHADER_TESS_EVAL ||
1462           s == MESA_SHADER_GEOMETRY)
1463          xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1464
1465       anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1466
1467       switch (s) {
1468       case MESA_SHADER_VERTEX:
1469          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1470                                  &stages[s]);
1471          break;
1472       case MESA_SHADER_TESS_CTRL:
1473          anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1474                                   &stages[s], prev_stage);
1475          break;
1476       case MESA_SHADER_TESS_EVAL:
1477          anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1478                                   &stages[s], prev_stage);
1479          break;
1480       case MESA_SHADER_GEOMETRY:
1481          anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1482                                  &stages[s], prev_stage);
1483          break;
1484       case MESA_SHADER_FRAGMENT:
1485          anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1486                                  &stages[s], prev_stage);
1487          break;
1488       default:
1489          unreachable("Invalid graphics shader stage");
1490       }
1491       if (stages[s].code == NULL) {
1492          ralloc_free(stage_ctx);
1493          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1494          goto fail;
1495       }
1496
1497       anv_nir_validate_push_layout(&stages[s].prog_data.base,
1498                                    &stages[s].bind_map);
1499
1500       struct anv_shader_bin *bin =
1501          anv_device_upload_kernel(pipeline->base.device, cache, s,
1502                                   &stages[s].cache_key,
1503                                   sizeof(stages[s].cache_key),
1504                                   stages[s].code,
1505                                   stages[s].prog_data.base.program_size,
1506                                   stages[s].nir->constant_data,
1507                                   stages[s].nir->constant_data_size,
1508                                   &stages[s].prog_data.base,
1509                                   brw_prog_data_size(s),
1510                                   stages[s].stats, stages[s].num_stats,
1511                                   xfb_info, &stages[s].bind_map);
1512       if (!bin) {
1513          ralloc_free(stage_ctx);
1514          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1515          goto fail;
1516       }
1517
1518       anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1519
1520       pipeline->shaders[s] = bin;
1521       ralloc_free(stage_ctx);
1522
1523       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1524
1525       prev_stage = &stages[s];
1526    }
1527
1528    ralloc_free(pipeline_ctx);
1529
1530 done:
1531
1532    if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1533        pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1534       /* This can happen if we decided to implicitly disable the fragment
1535        * shader.  See anv_pipeline_compile_fs().
1536        */
1537       anv_shader_bin_unref(pipeline->base.device,
1538                            pipeline->shaders[MESA_SHADER_FRAGMENT]);
1539       pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1540       pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1541    }
1542
1543    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1544
1545    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1546       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1547    if (create_feedback) {
1548       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1549
1550       assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1551       for (uint32_t i = 0; i < info->stageCount; i++) {
1552          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1553          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1554       }
1555    }
1556
1557    return VK_SUCCESS;
1558
1559 fail:
1560    ralloc_free(pipeline_ctx);
1561
1562    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1563       if (pipeline->shaders[s])
1564          anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1565    }
1566
1567    return result;
1568 }
1569
1570 static void
1571 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
1572 {
1573    assert(glsl_type_is_vector_or_scalar(type));
1574
1575    uint32_t comp_size = glsl_type_is_boolean(type)
1576       ? 4 : glsl_get_bit_size(type) / 8;
1577    unsigned length = glsl_get_vector_elements(type);
1578    *size = comp_size * length,
1579    *align = comp_size * (length == 3 ? 4 : length);
1580 }
1581
1582 VkResult
1583 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1584                         struct anv_pipeline_cache *cache,
1585                         const VkComputePipelineCreateInfo *info,
1586                         const struct anv_shader_module *module,
1587                         const char *entrypoint,
1588                         const VkSpecializationInfo *spec_info)
1589 {
1590    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1591       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1592    };
1593    int64_t pipeline_start = os_time_get_nano();
1594
1595    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1596
1597    struct anv_pipeline_stage stage = {
1598       .stage = MESA_SHADER_COMPUTE,
1599       .module = module,
1600       .entrypoint = entrypoint,
1601       .spec_info = spec_info,
1602       .cache_key = {
1603          .stage = MESA_SHADER_COMPUTE,
1604       },
1605       .feedback = {
1606          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1607       },
1608    };
1609    anv_pipeline_hash_shader(stage.module,
1610                             stage.entrypoint,
1611                             MESA_SHADER_COMPUTE,
1612                             stage.spec_info,
1613                             stage.shader_sha1);
1614
1615    struct anv_shader_bin *bin = NULL;
1616
1617    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1618       vk_find_struct_const(info->stage.pNext,
1619                            PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1620
1621    populate_cs_prog_key(&pipeline->base.device->info, info->stage.flags,
1622                         rss_info, &stage.key.cs);
1623
1624    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1625
1626    const bool skip_cache_lookup =
1627       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1628
1629    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1630
1631    bool cache_hit = false;
1632    if (!skip_cache_lookup) {
1633       bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1634                                          &stage.cache_key,
1635                                          sizeof(stage.cache_key),
1636                                          &cache_hit);
1637    }
1638
1639    void *mem_ctx = ralloc_context(NULL);
1640    if (bin == NULL) {
1641       int64_t stage_start = os_time_get_nano();
1642
1643       stage.bind_map = (struct anv_pipeline_bind_map) {
1644          .surface_to_descriptor = stage.surface_to_descriptor,
1645          .sampler_to_descriptor = stage.sampler_to_descriptor
1646       };
1647
1648       /* Set up a binding for the gl_NumWorkGroups */
1649       stage.bind_map.surface_count = 1;
1650       stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1651          .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1652       };
1653
1654       stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1655       if (stage.nir == NULL) {
1656          ralloc_free(mem_ctx);
1657          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1658       }
1659
1660       NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1661
1662       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1663
1664       NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
1665                  nir_var_mem_shared, shared_type_info);
1666       NIR_PASS_V(stage.nir, nir_lower_explicit_io,
1667                  nir_var_mem_shared, nir_address_format_32bit_offset);
1668       NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1669
1670       stage.num_stats = 1;
1671       stage.code = brw_compile_cs(compiler, pipeline->base.device, mem_ctx,
1672                                   &stage.key.cs, &stage.prog_data.cs,
1673                                   stage.nir, -1, stage.stats, NULL);
1674       if (stage.code == NULL) {
1675          ralloc_free(mem_ctx);
1676          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1677       }
1678
1679       anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1680
1681       if (!stage.prog_data.cs.uses_num_work_groups) {
1682          assert(stage.bind_map.surface_to_descriptor[0].set ==
1683                 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1684          stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1685       }
1686
1687       const unsigned code_size = stage.prog_data.base.program_size;
1688       bin = anv_device_upload_kernel(pipeline->base.device, cache,
1689                                      MESA_SHADER_COMPUTE,
1690                                      &stage.cache_key, sizeof(stage.cache_key),
1691                                      stage.code, code_size,
1692                                      stage.nir->constant_data,
1693                                      stage.nir->constant_data_size,
1694                                      &stage.prog_data.base,
1695                                      sizeof(stage.prog_data.cs),
1696                                      stage.stats, stage.num_stats,
1697                                      NULL, &stage.bind_map);
1698       if (!bin) {
1699          ralloc_free(mem_ctx);
1700          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1701       }
1702
1703       stage.feedback.duration = os_time_get_nano() - stage_start;
1704    }
1705
1706    anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1707
1708    ralloc_free(mem_ctx);
1709
1710    if (cache_hit) {
1711       stage.feedback.flags |=
1712          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1713       pipeline_feedback.flags |=
1714          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1715    }
1716    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1717
1718    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1719       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1720    if (create_feedback) {
1721       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1722
1723       assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1724       create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1725    }
1726
1727    pipeline->cs = bin;
1728
1729    return VK_SUCCESS;
1730 }
1731
1732 struct anv_cs_parameters
1733 anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
1734 {
1735    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1736
1737    struct anv_cs_parameters cs_params = {};
1738
1739    cs_params.group_size = cs_prog_data->local_size[0] *
1740                           cs_prog_data->local_size[1] *
1741                           cs_prog_data->local_size[2];
1742    cs_params.simd_size =
1743       brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
1744                                       cs_prog_data, cs_params.group_size);
1745    cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
1746
1747    return cs_params;
1748 }
1749
1750 /**
1751  * Copy pipeline state not marked as dynamic.
1752  * Dynamic state is pipeline state which hasn't been provided at pipeline
1753  * creation time, but is dynamically provided afterwards using various
1754  * vkCmdSet* functions.
1755  *
1756  * The set of state considered "non_dynamic" is determined by the pieces of
1757  * state that have their corresponding VkDynamicState enums omitted from
1758  * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1759  *
1760  * @param[out] pipeline    Destination non_dynamic state.
1761  * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
1762  */
1763 static void
1764 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
1765                        const VkGraphicsPipelineCreateInfo *pCreateInfo)
1766 {
1767    anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1768    struct anv_subpass *subpass = pipeline->subpass;
1769
1770    pipeline->dynamic_state = default_dynamic_state;
1771
1772    if (pCreateInfo->pDynamicState) {
1773       /* Remove all of the states that are marked as dynamic */
1774       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1775       for (uint32_t s = 0; s < count; s++) {
1776          states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
1777             pCreateInfo->pDynamicState->pDynamicStates[s]);
1778       }
1779    }
1780
1781    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1782
1783    /* Section 9.2 of the Vulkan 1.0.15 spec says:
1784     *
1785     *    pViewportState is [...] NULL if the pipeline
1786     *    has rasterization disabled.
1787     */
1788    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1789       assert(pCreateInfo->pViewportState);
1790
1791       dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1792       if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
1793          typed_memcpy(dynamic->viewport.viewports,
1794                      pCreateInfo->pViewportState->pViewports,
1795                      pCreateInfo->pViewportState->viewportCount);
1796       }
1797
1798       dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1799       if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
1800          typed_memcpy(dynamic->scissor.scissors,
1801                      pCreateInfo->pViewportState->pScissors,
1802                      pCreateInfo->pViewportState->scissorCount);
1803       }
1804    }
1805
1806    if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
1807       assert(pCreateInfo->pRasterizationState);
1808       dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1809    }
1810
1811    if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
1812       assert(pCreateInfo->pRasterizationState);
1813       dynamic->depth_bias.bias =
1814          pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1815       dynamic->depth_bias.clamp =
1816          pCreateInfo->pRasterizationState->depthBiasClamp;
1817       dynamic->depth_bias.slope =
1818          pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1819    }
1820
1821    /* Section 9.2 of the Vulkan 1.0.15 spec says:
1822     *
1823     *    pColorBlendState is [...] NULL if the pipeline has rasterization
1824     *    disabled or if the subpass of the render pass the pipeline is
1825     *    created against does not use any color attachments.
1826     */
1827    bool uses_color_att = false;
1828    for (unsigned i = 0; i < subpass->color_count; ++i) {
1829       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1830          uses_color_att = true;
1831          break;
1832       }
1833    }
1834
1835    if (uses_color_att &&
1836        !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1837       assert(pCreateInfo->pColorBlendState);
1838
1839       if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
1840          typed_memcpy(dynamic->blend_constants,
1841                      pCreateInfo->pColorBlendState->blendConstants, 4);
1842    }
1843
1844    /* If there is no depthstencil attachment, then don't read
1845     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1846     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1847     * no need to override the depthstencil defaults in
1848     * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1849     *
1850     * Section 9.2 of the Vulkan 1.0.15 spec says:
1851     *
1852     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
1853     *    disabled or if the subpass of the render pass the pipeline is created
1854     *    against does not use a depth/stencil attachment.
1855     */
1856    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1857        subpass->depth_stencil_attachment) {
1858       assert(pCreateInfo->pDepthStencilState);
1859
1860       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
1861          dynamic->depth_bounds.min =
1862             pCreateInfo->pDepthStencilState->minDepthBounds;
1863          dynamic->depth_bounds.max =
1864             pCreateInfo->pDepthStencilState->maxDepthBounds;
1865       }
1866
1867       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
1868          dynamic->stencil_compare_mask.front =
1869             pCreateInfo->pDepthStencilState->front.compareMask;
1870          dynamic->stencil_compare_mask.back =
1871             pCreateInfo->pDepthStencilState->back.compareMask;
1872       }
1873
1874       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
1875          dynamic->stencil_write_mask.front =
1876             pCreateInfo->pDepthStencilState->front.writeMask;
1877          dynamic->stencil_write_mask.back =
1878             pCreateInfo->pDepthStencilState->back.writeMask;
1879       }
1880
1881       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
1882          dynamic->stencil_reference.front =
1883             pCreateInfo->pDepthStencilState->front.reference;
1884          dynamic->stencil_reference.back =
1885             pCreateInfo->pDepthStencilState->back.reference;
1886       }
1887    }
1888
1889    const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
1890       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
1891                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
1892    if (line_state) {
1893       if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
1894          dynamic->line_stipple.factor = line_state->lineStippleFactor;
1895          dynamic->line_stipple.pattern = line_state->lineStipplePattern;
1896       }
1897    }
1898
1899    pipeline->dynamic_state_mask = states;
1900 }
1901
1902 static void
1903 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1904 {
1905 #ifdef DEBUG
1906    struct anv_render_pass *renderpass = NULL;
1907    struct anv_subpass *subpass = NULL;
1908
1909    /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1910     * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1911     */
1912    assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1913
1914    renderpass = anv_render_pass_from_handle(info->renderPass);
1915    assert(renderpass);
1916
1917    assert(info->subpass < renderpass->subpass_count);
1918    subpass = &renderpass->subpasses[info->subpass];
1919
1920    assert(info->stageCount >= 1);
1921    assert(info->pVertexInputState);
1922    assert(info->pInputAssemblyState);
1923    assert(info->pRasterizationState);
1924    if (!info->pRasterizationState->rasterizerDiscardEnable) {
1925       assert(info->pViewportState);
1926       assert(info->pMultisampleState);
1927
1928       if (subpass && subpass->depth_stencil_attachment)
1929          assert(info->pDepthStencilState);
1930
1931       if (subpass && subpass->color_count > 0) {
1932          bool all_color_unused = true;
1933          for (int i = 0; i < subpass->color_count; i++) {
1934             if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1935                all_color_unused = false;
1936          }
1937          /* pColorBlendState is ignored if the pipeline has rasterization
1938           * disabled or if the subpass of the render pass the pipeline is
1939           * created against does not use any color attachments.
1940           */
1941          assert(info->pColorBlendState || all_color_unused);
1942       }
1943    }
1944
1945    for (uint32_t i = 0; i < info->stageCount; ++i) {
1946       switch (info->pStages[i].stage) {
1947       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1948       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1949          assert(info->pTessellationState);
1950          break;
1951       default:
1952          break;
1953       }
1954    }
1955 #endif
1956 }
1957
1958 /**
1959  * Calculate the desired L3 partitioning based on the current state of the
1960  * pipeline.  For now this simply returns the conservative defaults calculated
1961  * by get_default_l3_weights(), but we could probably do better by gathering
1962  * more statistics from the pipeline state (e.g. guess of expected URB usage
1963  * and bound surfaces), or by using feed-back from performance counters.
1964  */
1965 void
1966 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1967 {
1968    const struct gen_device_info *devinfo = &pipeline->device->info;
1969
1970    const struct gen_l3_weights w =
1971       gen_get_default_l3_weights(devinfo, true, needs_slm);
1972
1973    pipeline->l3_config = gen_get_l3_config(devinfo, w);
1974 }
1975
1976 VkResult
1977 anv_pipeline_init(struct anv_graphics_pipeline *pipeline,
1978                   struct anv_device *device,
1979                   struct anv_pipeline_cache *cache,
1980                   const VkGraphicsPipelineCreateInfo *pCreateInfo,
1981                   const VkAllocationCallbacks *alloc)
1982 {
1983    VkResult result;
1984
1985    anv_pipeline_validate_create_info(pCreateInfo);
1986
1987    if (alloc == NULL)
1988       alloc = &device->vk.alloc;
1989
1990    vk_object_base_init(&device->vk, &pipeline->base.base,
1991                        VK_OBJECT_TYPE_PIPELINE);
1992    pipeline->base.device = device;
1993    pipeline->base.type = ANV_PIPELINE_GRAPHICS;
1994
1995    ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1996    assert(pCreateInfo->subpass < render_pass->subpass_count);
1997    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1998
1999    result = anv_reloc_list_init(&pipeline->base.batch_relocs, alloc);
2000    if (result != VK_SUCCESS)
2001       return result;
2002
2003    pipeline->base.batch.alloc = alloc;
2004    pipeline->base.batch.next = pipeline->base.batch.start = pipeline->batch_data;
2005    pipeline->base.batch.end = pipeline->base.batch.start + sizeof(pipeline->batch_data);
2006    pipeline->base.batch.relocs = &pipeline->base.batch_relocs;
2007    pipeline->base.batch.status = VK_SUCCESS;
2008
2009    pipeline->base.mem_ctx = ralloc_context(NULL);
2010    pipeline->base.flags = pCreateInfo->flags;
2011
2012    assert(pCreateInfo->pRasterizationState);
2013
2014    copy_non_dynamic_state(pipeline, pCreateInfo);
2015    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2016
2017    /* Previously we enabled depth clipping when !depthClampEnable.
2018     * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2019     * clipping info is available, use its enable value to determine clipping,
2020     * otherwise fallback to the previous !depthClampEnable logic.
2021     */
2022    const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2023       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2024                            PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2025    pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2026
2027    pipeline->sample_shading_enable =
2028       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2029       pCreateInfo->pMultisampleState &&
2030       pCreateInfo->pMultisampleState->sampleShadingEnable;
2031
2032    /* When we free the pipeline, we detect stages based on the NULL status
2033     * of various prog_data pointers.  Make them NULL by default.
2034     */
2035    memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
2036
2037    util_dynarray_init(&pipeline->base.executables, pipeline->base.mem_ctx);
2038
2039    result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2040    if (result != VK_SUCCESS) {
2041       ralloc_free(pipeline->base.mem_ctx);
2042       anv_reloc_list_finish(&pipeline->base.batch_relocs, alloc);
2043       return result;
2044    }
2045
2046    assert(pipeline->shaders[MESA_SHADER_VERTEX]);
2047
2048    anv_pipeline_setup_l3_config(&pipeline->base, false);
2049
2050    const VkPipelineVertexInputStateCreateInfo *vi_info =
2051       pCreateInfo->pVertexInputState;
2052
2053    const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2054
2055    pipeline->vb_used = 0;
2056    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2057       const VkVertexInputAttributeDescription *desc =
2058          &vi_info->pVertexAttributeDescriptions[i];
2059
2060       if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2061          pipeline->vb_used |= 1 << desc->binding;
2062    }
2063
2064    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2065       const VkVertexInputBindingDescription *desc =
2066          &vi_info->pVertexBindingDescriptions[i];
2067
2068       pipeline->vb[desc->binding].stride = desc->stride;
2069
2070       /* Step rate is programmed per vertex element (attribute), not
2071        * binding. Set up a map of which bindings step per instance, for
2072        * reference by vertex element setup. */
2073       switch (desc->inputRate) {
2074       default:
2075       case VK_VERTEX_INPUT_RATE_VERTEX:
2076          pipeline->vb[desc->binding].instanced = false;
2077          break;
2078       case VK_VERTEX_INPUT_RATE_INSTANCE:
2079          pipeline->vb[desc->binding].instanced = true;
2080          break;
2081       }
2082
2083       pipeline->vb[desc->binding].instance_divisor = 1;
2084    }
2085
2086    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2087       vk_find_struct_const(vi_info->pNext,
2088                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2089    if (vi_div_state) {
2090       for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2091          const VkVertexInputBindingDivisorDescriptionEXT *desc =
2092             &vi_div_state->pVertexBindingDivisors[i];
2093
2094          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2095       }
2096    }
2097
2098    /* Our implementation of VK_KHR_multiview uses instancing to draw the
2099     * different views.  If the client asks for instancing, we need to multiply
2100     * the instance divisor by the number of views ensure that we repeat the
2101     * client's per-instance data once for each view.
2102     */
2103    if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2104       const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2105       for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2106          if (pipeline->vb[vb].instanced)
2107             pipeline->vb[vb].instance_divisor *= view_count;
2108       }
2109    }
2110
2111    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2112       pCreateInfo->pInputAssemblyState;
2113    const VkPipelineTessellationStateCreateInfo *tess_info =
2114       pCreateInfo->pTessellationState;
2115    pipeline->primitive_restart = ia_info->primitiveRestartEnable;
2116
2117    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2118       pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2119    else
2120       pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
2121
2122    return VK_SUCCESS;
2123 }
2124
2125 #define WRITE_STR(field, ...) ({                               \
2126    memset(field, 0, sizeof(field));                            \
2127    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2128    assert(i > 0 && i < sizeof(field));                         \
2129 })
2130
2131 VkResult anv_GetPipelineExecutablePropertiesKHR(
2132     VkDevice                                    device,
2133     const VkPipelineInfoKHR*                    pPipelineInfo,
2134     uint32_t*                                   pExecutableCount,
2135     VkPipelineExecutablePropertiesKHR*          pProperties)
2136 {
2137    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2138    VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
2139
2140    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
2141       vk_outarray_append(&out, props) {
2142          gl_shader_stage stage = exe->stage;
2143          props->stages = mesa_to_vk_shader_stage(stage);
2144
2145          unsigned simd_width = exe->stats.dispatch_width;
2146          if (stage == MESA_SHADER_FRAGMENT) {
2147             WRITE_STR(props->name, "%s%d %s",
2148                       simd_width ? "SIMD" : "vec",
2149                       simd_width ? simd_width : 4,
2150                       _mesa_shader_stage_to_string(stage));
2151          } else {
2152             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
2153          }
2154          WRITE_STR(props->description, "%s%d %s shader",
2155                    simd_width ? "SIMD" : "vec",
2156                    simd_width ? simd_width : 4,
2157                    _mesa_shader_stage_to_string(stage));
2158
2159          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
2160           * wants a subgroup size of 1.
2161           */
2162          props->subgroupSize = MAX2(simd_width, 1);
2163       }
2164    }
2165
2166    return vk_outarray_status(&out);
2167 }
2168
2169 static const struct anv_pipeline_executable *
2170 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
2171 {
2172    assert(index < util_dynarray_num_elements(&pipeline->executables,
2173                                              struct anv_pipeline_executable));
2174    return util_dynarray_element(
2175       &pipeline->executables, struct anv_pipeline_executable, index);
2176 }
2177
2178 VkResult anv_GetPipelineExecutableStatisticsKHR(
2179     VkDevice                                    device,
2180     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
2181     uint32_t*                                   pStatisticCount,
2182     VkPipelineExecutableStatisticKHR*           pStatistics)
2183 {
2184    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2185    VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
2186
2187    const struct anv_pipeline_executable *exe =
2188       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2189
2190    const struct brw_stage_prog_data *prog_data;
2191    switch (pipeline->type) {
2192    case ANV_PIPELINE_GRAPHICS: {
2193       prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
2194       break;
2195    }
2196    case ANV_PIPELINE_COMPUTE: {
2197       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
2198       break;
2199    }
2200    default:
2201       unreachable("invalid pipeline type");
2202    }
2203
2204    vk_outarray_append(&out, stat) {
2205       WRITE_STR(stat->name, "Instruction Count");
2206       WRITE_STR(stat->description,
2207                 "Number of GEN instructions in the final generated "
2208                 "shader executable.");
2209       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2210       stat->value.u64 = exe->stats.instructions;
2211    }
2212
2213    vk_outarray_append(&out, stat) {
2214       WRITE_STR(stat->name, "SEND Count");
2215       WRITE_STR(stat->description,
2216                 "Number of instructions in the final generated shader "
2217                 "executable which access external units such as the "
2218                 "constant cache or the sampler.");
2219       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2220       stat->value.u64 = exe->stats.sends;
2221    }
2222
2223    vk_outarray_append(&out, stat) {
2224       WRITE_STR(stat->name, "Loop Count");
2225       WRITE_STR(stat->description,
2226                 "Number of loops (not unrolled) in the final generated "
2227                 "shader executable.");
2228       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2229       stat->value.u64 = exe->stats.loops;
2230    }
2231
2232    vk_outarray_append(&out, stat) {
2233       WRITE_STR(stat->name, "Cycle Count");
2234       WRITE_STR(stat->description,
2235                 "Estimate of the number of EU cycles required to execute "
2236                 "the final generated executable.  This is an estimate only "
2237                 "and may vary greatly from actual run-time performance.");
2238       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2239       stat->value.u64 = exe->stats.cycles;
2240    }
2241
2242    vk_outarray_append(&out, stat) {
2243       WRITE_STR(stat->name, "Spill Count");
2244       WRITE_STR(stat->description,
2245                 "Number of scratch spill operations.  This gives a rough "
2246                 "estimate of the cost incurred due to spilling temporary "
2247                 "values to memory.  If this is non-zero, you may want to "
2248                 "adjust your shader to reduce register pressure.");
2249       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2250       stat->value.u64 = exe->stats.spills;
2251    }
2252
2253    vk_outarray_append(&out, stat) {
2254       WRITE_STR(stat->name, "Fill Count");
2255       WRITE_STR(stat->description,
2256                 "Number of scratch fill operations.  This gives a rough "
2257                 "estimate of the cost incurred due to spilling temporary "
2258                 "values to memory.  If this is non-zero, you may want to "
2259                 "adjust your shader to reduce register pressure.");
2260       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2261       stat->value.u64 = exe->stats.fills;
2262    }
2263
2264    vk_outarray_append(&out, stat) {
2265       WRITE_STR(stat->name, "Scratch Memory Size");
2266       WRITE_STR(stat->description,
2267                 "Number of bytes of scratch memory required by the "
2268                 "generated shader executable.  If this is non-zero, you "
2269                 "may want to adjust your shader to reduce register "
2270                 "pressure.");
2271       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2272       stat->value.u64 = prog_data->total_scratch;
2273    }
2274
2275    if (exe->stage == MESA_SHADER_COMPUTE) {
2276       vk_outarray_append(&out, stat) {
2277          WRITE_STR(stat->name, "Workgroup Memory Size");
2278          WRITE_STR(stat->description,
2279                    "Number of bytes of workgroup shared memory used by this "
2280                    "compute shader including any padding.");
2281          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2282          stat->value.u64 = brw_cs_prog_data_const(prog_data)->slm_size;
2283       }
2284    }
2285
2286    return vk_outarray_status(&out);
2287 }
2288
2289 static bool
2290 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2291               const char *data)
2292 {
2293    ir->isText = VK_TRUE;
2294
2295    size_t data_len = strlen(data) + 1;
2296
2297    if (ir->pData == NULL) {
2298       ir->dataSize = data_len;
2299       return true;
2300    }
2301
2302    strncpy(ir->pData, data, ir->dataSize);
2303    if (ir->dataSize < data_len)
2304       return false;
2305
2306    ir->dataSize = data_len;
2307    return true;
2308 }
2309
2310 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2311     VkDevice                                    device,
2312     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
2313     uint32_t*                                   pInternalRepresentationCount,
2314     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2315 {
2316    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2317    VK_OUTARRAY_MAKE(out, pInternalRepresentations,
2318                     pInternalRepresentationCount);
2319    bool incomplete_text = false;
2320
2321    const struct anv_pipeline_executable *exe =
2322       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2323
2324    if (exe->nir) {
2325       vk_outarray_append(&out, ir) {
2326          WRITE_STR(ir->name, "Final NIR");
2327          WRITE_STR(ir->description,
2328                    "Final NIR before going into the back-end compiler");
2329
2330          if (!write_ir_text(ir, exe->nir))
2331             incomplete_text = true;
2332       }
2333    }
2334
2335    if (exe->disasm) {
2336       vk_outarray_append(&out, ir) {
2337          WRITE_STR(ir->name, "GEN Assembly");
2338          WRITE_STR(ir->description,
2339                    "Final GEN assembly for the generated shader binary");
2340
2341          if (!write_ir_text(ir, exe->disasm))
2342             incomplete_text = true;
2343       }
2344    }
2345
2346    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2347 }