src/intel/vulkan/anv_pipeline.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28 #include <fcntl.h>
  29
  30 #include "util/mesa-sha1.h"
  31 #include "common/gen_l3_config.h"
  32 #include "anv_private.h"
  33 #include "compiler/brw_nir.h"
  34 #include "anv_nir.h"
  35 #include "spirv/nir_spirv.h"
  36 #include "vk_util.h"
  37
  38 /* Needed for SWIZZLE macros */
  39 #include "program/prog_instruction.h"
  40
  41 // Shader functions
  42
  43 VkResult anv_CreateShaderModule(
  44     VkDevice                                    _device,
  45     const VkShaderModuleCreateInfo*             pCreateInfo,
  46     const VkAllocationCallbacks*                pAllocator,
  47     VkShaderModule*                             pShaderModule)
  48 {
  49    ANV_FROM_HANDLE(anv_device, device, _device);
  50    struct anv_shader_module *module;
  51
  52    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
  53    assert(pCreateInfo->flags == 0);
  54
  55    module = vk_alloc2(&device->alloc, pAllocator,
  56                        sizeof(*module) + pCreateInfo->codeSize, 8,
  57                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  58    if (module == NULL)
  59       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  60
  61    module->size = pCreateInfo->codeSize;
  62    memcpy(module->data, pCreateInfo->pCode, module->size);
  63
  64    _mesa_sha1_compute(module->data, module->size, module->sha1);
  65
  66    *pShaderModule = anv_shader_module_to_handle(module);
  67
  68    return VK_SUCCESS;
  69 }
  70
  71 void anv_DestroyShaderModule(
  72     VkDevice                                    _device,
  73     VkShaderModule                              _module,
  74     const VkAllocationCallbacks*                pAllocator)
  75 {
  76    ANV_FROM_HANDLE(anv_device, device, _device);
  77    ANV_FROM_HANDLE(anv_shader_module, module, _module);
  78
  79    if (!module)
  80       return;
  81
  82    vk_free2(&device->alloc, pAllocator, module);
  83 }
  84
  85 #define SPIR_V_MAGIC_NUMBER 0x07230203
  86
  87 static const uint64_t stage_to_debug[] = {
  88    [MESA_SHADER_VERTEX] = DEBUG_VS,
  89    [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
  90    [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
  91    [MESA_SHADER_GEOMETRY] = DEBUG_GS,
  92    [MESA_SHADER_FRAGMENT] = DEBUG_WM,
  93    [MESA_SHADER_COMPUTE] = DEBUG_CS,
  94 };
  95
  96 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
  97  * we can't do that yet because we don't have the ability to copy nir.
  98  */
  99 static nir_shader *
 100 anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
 101                           void *mem_ctx,
 102                           const struct anv_shader_module *module,
 103                           const char *entrypoint_name,
 104                           gl_shader_stage stage,
 105                           const VkSpecializationInfo *spec_info)
 106 {
 107    const struct anv_device *device = pipeline->device;
 108
 109    const struct brw_compiler *compiler =
 110       device->instance->physicalDevice.compiler;
 111    const nir_shader_compiler_options *nir_options =
 112       compiler->glsl_compiler_options[stage].NirOptions;
 113
 114    uint32_t *spirv = (uint32_t *) module->data;
 115    assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
 116    assert(module->size % 4 == 0);
 117
 118    uint32_t num_spec_entries = 0;
 119    struct nir_spirv_specialization *spec_entries = NULL;
 120    if (spec_info && spec_info->mapEntryCount > 0) {
 121       num_spec_entries = spec_info->mapEntryCount;
 122       spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
 123       for (uint32_t i = 0; i < num_spec_entries; i++) {
 124          VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
 125          const void *data = spec_info->pData + entry.offset;
 126          assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 127
 128          spec_entries[i].id = spec_info->pMapEntries[i].constantID;
 129          if (spec_info->dataSize == 8)
 130             spec_entries[i].data64 = *(const uint64_t *)data;
 131          else
 132             spec_entries[i].data32 = *(const uint32_t *)data;
 133       }
 134    }
 135
 136    struct spirv_to_nir_options spirv_options = {
 137       .lower_workgroup_access_to_offsets = true,
 138       .caps = {
 139          .float64 = device->instance->physicalDevice.info.gen >= 8,
 140          .int64 = device->instance->physicalDevice.info.gen >= 8,
 141          .tessellation = true,
 142          .device_group = true,
 143          .draw_parameters = true,
 144          .image_write_without_format = true,
 145          .min_lod = true,
 146          .multiview = true,
 147          .variable_pointers = true,
 148          .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
 149          .int16 = device->instance->physicalDevice.info.gen >= 8,
 150          .shader_viewport_index_layer = true,
 151          .subgroup_arithmetic = true,
 152          .subgroup_basic = true,
 153          .subgroup_ballot = true,
 154          .subgroup_quad = true,
 155          .subgroup_shuffle = true,
 156          .subgroup_vote = true,
 157          .stencil_export = device->instance->physicalDevice.info.gen >= 9,
 158          .storage_8bit = device->instance->physicalDevice.info.gen >= 8,
 159          .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9,
 160       },
 161       .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
 162       .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
 163       .push_const_ptr_type = glsl_uint_type(),
 164       .shared_ptr_type = glsl_uint_type(),
 165    };
 166
 167    nir_function *entry_point =
 168       spirv_to_nir(spirv, module->size / 4,
 169                    spec_entries, num_spec_entries,
 170                    stage, entrypoint_name, &spirv_options, nir_options);
 171    nir_shader *nir = entry_point->shader;
 172    assert(nir->info.stage == stage);
 173    nir_validate_shader(nir, "after spirv_to_nir");
 174    ralloc_steal(mem_ctx, nir);
 175
 176    free(spec_entries);
 177
 178    if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
 179       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
 180               gl_shader_stage_name(stage));
 181       nir_print_shader(nir, stderr);
 182    }
 183
 184    /* We have to lower away local constant initializers right before we
 185     * inline functions.  That way they get properly initialized at the top
 186     * of the function and not at the top of its caller.
 187     */
 188    NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
 189    NIR_PASS_V(nir, nir_lower_returns);
 190    NIR_PASS_V(nir, nir_inline_functions);
 191    NIR_PASS_V(nir, nir_opt_deref);
 192
 193    /* Pick off the single entrypoint that we want */
 194    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
 195       if (func != entry_point)
 196          exec_node_remove(&func->node);
 197    }
 198    assert(exec_list_length(&nir->functions) == 1);
 199
 200    /* Now that we've deleted all but the main function, we can go ahead and
 201     * lower the rest of the constant initializers.  We do this here so that
 202     * nir_remove_dead_variables and split_per_member_structs below see the
 203     * corresponding stores.
 204     */
 205    NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
 206
 207    /* Split member structs.  We do this before lower_io_to_temporaries so that
 208     * it doesn't lower system values to temporaries by accident.
 209     */
 210    NIR_PASS_V(nir, nir_split_var_copies);
 211    NIR_PASS_V(nir, nir_split_per_member_structs);
 212
 213    NIR_PASS_V(nir, nir_remove_dead_variables,
 214               nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
 215
 216    if (stage == MESA_SHADER_FRAGMENT)
 217       NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
 218
 219    NIR_PASS_V(nir, nir_propagate_invariant);
 220    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 221               entry_point->impl, true, false);
 222
 223    /* Vulkan uses the separate-shader linking model */
 224    nir->info.separate_shader = true;
 225
 226    nir = brw_preprocess_nir(compiler, nir);
 227
 228    if (stage == MESA_SHADER_FRAGMENT)
 229       NIR_PASS_V(nir, anv_nir_lower_input_attachments);
 230
 231    return nir;
 232 }
 233
 234 void anv_DestroyPipeline(
 235     VkDevice                                    _device,
 236     VkPipeline                                  _pipeline,
 237     const VkAllocationCallbacks*                pAllocator)
 238 {
 239    ANV_FROM_HANDLE(anv_device, device, _device);
 240    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
 241
 242    if (!pipeline)
 243       return;
 244
 245    anv_reloc_list_finish(&pipeline->batch_relocs,
 246                          pAllocator ? pAllocator : &device->alloc);
 247    if (pipeline->blend_state.map)
 248       anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
 249
 250    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 251       if (pipeline->shaders[s])
 252          anv_shader_bin_unref(device, pipeline->shaders[s]);
 253    }
 254
 255    vk_free2(&device->alloc, pAllocator, pipeline);
 256 }
 257
 258 static const uint32_t vk_to_gen_primitive_type[] = {
 259    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
 260    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
 261    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
 262    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
 263    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
 264    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
 265    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
 266    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
 267    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
 268    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
 269 };
 270
 271 static void
 272 populate_sampler_prog_key(const struct gen_device_info *devinfo,
 273                           struct brw_sampler_prog_key_data *key)
 274 {
 275    /* Almost all multisampled textures are compressed.  The only time when we
 276     * don't compress a multisampled texture is for 16x MSAA with a surface
 277     * width greater than 8k which is a bit of an edge case.  Since the sampler
 278     * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
 279     * to tell the compiler to always assume compression.
 280     */
 281    key->compressed_multisample_layout_mask = ~0;
 282
 283    /* SkyLake added support for 16x MSAA.  With this came a new message for
 284     * reading from a 16x MSAA surface with compression.  The new message was
 285     * needed because now the MCS data is 64 bits instead of 32 or lower as is
 286     * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
 287     * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
 288     * so we can just use it unconditionally.  This may not be quite as
 289     * efficient but it saves us from recompiling.
 290     */
 291    if (devinfo->gen >= 9)
 292       key->msaa_16 = ~0;
 293
 294    /* XXX: Handle texture swizzle on HSW- */
 295    for (int i = 0; i < MAX_SAMPLERS; i++) {
 296       /* Assume color sampler, no swizzling. (Works for BDW+) */
 297       key->swizzles[i] = SWIZZLE_XYZW;
 298    }
 299 }
 300
 301 static void
 302 populate_vs_prog_key(const struct gen_device_info *devinfo,
 303                      struct brw_vs_prog_key *key)
 304 {
 305    memset(key, 0, sizeof(*key));
 306
 307    populate_sampler_prog_key(devinfo, &key->tex);
 308
 309    /* XXX: Handle vertex input work-arounds */
 310
 311    /* XXX: Handle sampler_prog_key */
 312 }
 313
 314 static void
 315 populate_tcs_prog_key(const struct gen_device_info *devinfo,
 316                       unsigned input_vertices,
 317                       struct brw_tcs_prog_key *key)
 318 {
 319    memset(key, 0, sizeof(*key));
 320
 321    populate_sampler_prog_key(devinfo, &key->tex);
 322
 323    key->input_vertices = input_vertices;
 324 }
 325
 326 static void
 327 populate_tes_prog_key(const struct gen_device_info *devinfo,
 328                       struct brw_tes_prog_key *key)
 329 {
 330    memset(key, 0, sizeof(*key));
 331
 332    populate_sampler_prog_key(devinfo, &key->tex);
 333 }
 334
 335 static void
 336 populate_gs_prog_key(const struct gen_device_info *devinfo,
 337                      struct brw_gs_prog_key *key)
 338 {
 339    memset(key, 0, sizeof(*key));
 340
 341    populate_sampler_prog_key(devinfo, &key->tex);
 342 }
 343
 344 static void
 345 populate_wm_prog_key(const struct gen_device_info *devinfo,
 346                      const struct anv_subpass *subpass,
 347                      const VkPipelineMultisampleStateCreateInfo *ms_info,
 348                      struct brw_wm_prog_key *key)
 349 {
 350    memset(key, 0, sizeof(*key));
 351
 352    populate_sampler_prog_key(devinfo, &key->tex);
 353
 354    /* We set this to 0 here and set to the actual value before we call
 355     * brw_compile_fs.
 356     */
 357    key->input_slots_valid = 0;
 358
 359    /* Vulkan doesn't specify a default */
 360    key->high_quality_derivatives = false;
 361
 362    /* XXX Vulkan doesn't appear to specify */
 363    key->clamp_fragment_color = false;
 364
 365    assert(subpass->color_count <= MAX_RTS);
 366    for (uint32_t i = 0; i < subpass->color_count; i++) {
 367       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
 368          key->color_outputs_valid |= (1 << i);
 369    }
 370
 371    key->nr_color_regions = util_bitcount(key->color_outputs_valid);
 372
 373    key->replicate_alpha = key->nr_color_regions > 1 &&
 374                           ms_info && ms_info->alphaToCoverageEnable;
 375
 376    if (ms_info) {
 377       /* We should probably pull this out of the shader, but it's fairly
 378        * harmless to compute it and then let dead-code take care of it.
 379        */
 380       if (ms_info->rasterizationSamples > 1) {
 381          key->persample_interp =
 382             (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
 383          key->multisample_fbo = true;
 384       }
 385
 386       key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
 387    }
 388 }
 389
 390 static void
 391 populate_cs_prog_key(const struct gen_device_info *devinfo,
 392                      struct brw_cs_prog_key *key)
 393 {
 394    memset(key, 0, sizeof(*key));
 395
 396    populate_sampler_prog_key(devinfo, &key->tex);
 397 }
 398
 399 struct anv_pipeline_stage {
 400    gl_shader_stage stage;
 401
 402    const struct anv_shader_module *module;
 403    const char *entrypoint;
 404    const VkSpecializationInfo *spec_info;
 405
 406    union brw_any_prog_key key;
 407
 408    struct {
 409       gl_shader_stage stage;
 410       unsigned char sha1[20];
 411    } cache_key;
 412
 413    nir_shader *nir;
 414
 415    struct anv_pipeline_binding surface_to_descriptor[256];
 416    struct anv_pipeline_binding sampler_to_descriptor[256];
 417    struct anv_pipeline_bind_map bind_map;
 418
 419    union brw_any_prog_data prog_data;
 420 };
 421
 422 static void
 423 anv_pipeline_hash_shader(struct mesa_sha1 *ctx,
 424                          struct anv_pipeline_stage *stage)
 425 {
 426    _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1));
 427    _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint));
 428    _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage));
 429    if (stage->spec_info) {
 430       _mesa_sha1_update(ctx, stage->spec_info->pMapEntries,
 431                         stage->spec_info->mapEntryCount *
 432                         sizeof(*stage->spec_info->pMapEntries));
 433       _mesa_sha1_update(ctx, stage->spec_info->pData,
 434                         stage->spec_info->dataSize);
 435    }
 436    _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage));
 437 }
 438
 439 static void
 440 anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
 441                            struct anv_pipeline_layout *layout,
 442                            struct anv_pipeline_stage *stages,
 443                            unsigned char *sha1_out)
 444 {
 445    struct mesa_sha1 ctx;
 446    _mesa_sha1_init(&ctx);
 447
 448    _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
 449                      sizeof(pipeline->subpass->view_mask));
 450
 451    if (layout)
 452       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 453
 454    const bool rba = pipeline->device->robust_buffer_access;
 455    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 456
 457    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 458       if (stages[s].entrypoint)
 459          anv_pipeline_hash_shader(&ctx, &stages[s]);
 460    }
 461
 462    _mesa_sha1_final(&ctx, sha1_out);
 463 }
 464
 465 static void
 466 anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
 467                           struct anv_pipeline_layout *layout,
 468                           struct anv_pipeline_stage *stage,
 469                           unsigned char *sha1_out)
 470 {
 471    struct mesa_sha1 ctx;
 472    _mesa_sha1_init(&ctx);
 473
 474    if (layout)
 475       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 476
 477    const bool rba = pipeline->device->robust_buffer_access;
 478    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 479
 480    anv_pipeline_hash_shader(&ctx, stage);
 481
 482    _mesa_sha1_final(&ctx, sha1_out);
 483 }
 484
 485 static void
 486 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
 487                        void *mem_ctx,
 488                        struct anv_pipeline_stage *stage,
 489                        struct anv_pipeline_layout *layout)
 490 {
 491    const struct brw_compiler *compiler =
 492       pipeline->device->instance->physicalDevice.compiler;
 493
 494    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
 495    nir_shader *nir = stage->nir;
 496
 497    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
 498
 499    NIR_PASS_V(nir, anv_nir_lower_push_constants);
 500
 501    if (nir->info.stage != MESA_SHADER_COMPUTE)
 502       NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
 503
 504    if (nir->info.stage == MESA_SHADER_COMPUTE)
 505       prog_data->total_shared = nir->num_shared;
 506
 507    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 508
 509    if (nir->num_uniforms > 0) {
 510       assert(prog_data->nr_params == 0);
 511
 512       /* If the shader uses any push constants at all, we'll just give
 513        * them the maximum possible number
 514        */
 515       assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
 516       nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
 517       prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
 518       prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
 519
 520       /* We now set the param values to be offsets into a
 521        * anv_push_constant_data structure.  Since the compiler doesn't
 522        * actually dereference any of the gl_constant_value pointers in the
 523        * params array, it doesn't really matter what we put here.
 524        */
 525       struct anv_push_constants *null_data = NULL;
 526       /* Fill out the push constants section of the param array */
 527       for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
 528          prog_data->param[i] = ANV_PARAM_PUSH(
 529             (uintptr_t)&null_data->client_data[i * sizeof(float)]);
 530       }
 531    }
 532
 533    if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
 534       pipeline->needs_data_cache = true;
 535
 536    NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
 537
 538    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
 539    if (layout) {
 540       anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
 541                                     pipeline->device->robust_buffer_access,
 542                                     layout, nir, prog_data,
 543                                     &stage->bind_map);
 544    }
 545
 546    if (nir->info.stage != MESA_SHADER_COMPUTE)
 547       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 548
 549    assert(nir->num_uniforms == prog_data->nr_params * 4);
 550
 551    stage->nir = nir;
 552 }
 553
 554 static void
 555 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
 556 {
 557    prog_data->binding_table.size_bytes = 0;
 558    prog_data->binding_table.texture_start = bias;
 559    prog_data->binding_table.gather_texture_start = bias;
 560    prog_data->binding_table.ubo_start = bias;
 561    prog_data->binding_table.ssbo_start = bias;
 562    prog_data->binding_table.image_start = bias;
 563 }
 564
 565 static void
 566 anv_pipeline_link_vs(const struct brw_compiler *compiler,
 567                      struct anv_pipeline_stage *vs_stage,
 568                      struct anv_pipeline_stage *next_stage)
 569 {
 570    anv_fill_binding_table(&vs_stage->prog_data.vs.base.base, 0);
 571
 572    if (next_stage)
 573       brw_nir_link_shaders(compiler, &vs_stage->nir, &next_stage->nir);
 574 }
 575
 576 static const unsigned *
 577 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
 578                         void *mem_ctx,
 579                         struct anv_pipeline_stage *vs_stage)
 580 {
 581    brw_compute_vue_map(compiler->devinfo,
 582                        &vs_stage->prog_data.vs.base.vue_map,
 583                        vs_stage->nir->info.outputs_written,
 584                        vs_stage->nir->info.separate_shader);
 585
 586    return brw_compile_vs(compiler, NULL, mem_ctx, &vs_stage->key.vs,
 587                          &vs_stage->prog_data.vs, vs_stage->nir, -1, NULL);
 588 }
 589
 590 static void
 591 merge_tess_info(struct shader_info *tes_info,
 592                 const struct shader_info *tcs_info)
 593 {
 594    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
 595     *
 596     *    "PointMode. Controls generation of points rather than triangles
 597     *     or lines. This functionality defaults to disabled, and is
 598     *     enabled if either shader stage includes the execution mode.
 599     *
 600     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
 601     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
 602     * and OutputVertices, it says:
 603     *
 604     *    "One mode must be set in at least one of the tessellation
 605     *     shader stages."
 606     *
 607     * So, the fields can be set in either the TCS or TES, but they must
 608     * agree if set in both.  Our backend looks at TES, so bitwise-or in
 609     * the values from the TCS.
 610     */
 611    assert(tcs_info->tess.tcs_vertices_out == 0 ||
 612           tes_info->tess.tcs_vertices_out == 0 ||
 613           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
 614    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
 615
 616    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
 617           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
 618           tcs_info->tess.spacing == tes_info->tess.spacing);
 619    tes_info->tess.spacing |= tcs_info->tess.spacing;
 620
 621    assert(tcs_info->tess.primitive_mode == 0 ||
 622           tes_info->tess.primitive_mode == 0 ||
 623           tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
 624    tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
 625    tes_info->tess.ccw |= tcs_info->tess.ccw;
 626    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
 627 }
 628
 629 static void
 630 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
 631                       struct anv_pipeline_stage *tcs_stage,
 632                       struct anv_pipeline_stage *tes_stage)
 633 {
 634    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
 635
 636    anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
 637
 638    brw_nir_link_shaders(compiler, &tcs_stage->nir, &tes_stage->nir);
 639
 640    nir_lower_patch_vertices(tes_stage->nir,
 641                             tcs_stage->nir->info.tess.tcs_vertices_out,
 642                             NULL);
 643
 644    /* Copy TCS info into the TES info */
 645    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
 646
 647    anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
 648    anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
 649
 650    /* Whacking the key after cache lookup is a bit sketchy, but all of
 651     * this comes from the SPIR-V, which is part of the hash used for the
 652     * pipeline cache.  So it should be safe.
 653     */
 654    tcs_stage->key.tcs.tes_primitive_mode =
 655       tes_stage->nir->info.tess.primitive_mode;
 656    tcs_stage->key.tcs.quads_workaround =
 657       compiler->devinfo->gen < 9 &&
 658       tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
 659       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
 660 }
 661
 662 static const unsigned *
 663 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
 664                          void *mem_ctx,
 665                          struct anv_pipeline_stage *tcs_stage,
 666                          struct anv_pipeline_stage *prev_stage)
 667 {
 668    tcs_stage->key.tcs.outputs_written =
 669       tcs_stage->nir->info.outputs_written;
 670    tcs_stage->key.tcs.patch_outputs_written =
 671       tcs_stage->nir->info.patch_outputs_written;
 672
 673    return brw_compile_tcs(compiler, NULL, mem_ctx, &tcs_stage->key.tcs,
 674                           &tcs_stage->prog_data.tcs, tcs_stage->nir,
 675                           -1, NULL);
 676 }
 677
 678 static void
 679 anv_pipeline_link_tes(const struct brw_compiler *compiler,
 680                       struct anv_pipeline_stage *tes_stage,
 681                       struct anv_pipeline_stage *next_stage)
 682 {
 683    anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
 684
 685    if (next_stage)
 686       brw_nir_link_shaders(compiler, &tes_stage->nir, &next_stage->nir);
 687 }
 688
 689 static const unsigned *
 690 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
 691                          void *mem_ctx,
 692                          struct anv_pipeline_stage *tes_stage,
 693                          struct anv_pipeline_stage *tcs_stage)
 694 {
 695    tes_stage->key.tes.inputs_read =
 696       tcs_stage->nir->info.outputs_written;
 697    tes_stage->key.tes.patch_inputs_read =
 698       tcs_stage->nir->info.patch_outputs_written;
 699
 700    return brw_compile_tes(compiler, NULL, mem_ctx, &tes_stage->key.tes,
 701                           &tcs_stage->prog_data.tcs.base.vue_map,
 702                           &tes_stage->prog_data.tes, tes_stage->nir,
 703                           NULL, -1, NULL);
 704 }
 705
 706 static void
 707 anv_pipeline_link_gs(const struct brw_compiler *compiler,
 708                      struct anv_pipeline_stage *gs_stage,
 709                      struct anv_pipeline_stage *next_stage)
 710 {
 711    anv_fill_binding_table(&gs_stage->prog_data.gs.base.base, 0);
 712
 713    if (next_stage)
 714       brw_nir_link_shaders(compiler, &gs_stage->nir, &next_stage->nir);
 715 }
 716
 717 static const unsigned *
 718 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
 719                         void *mem_ctx,
 720                         struct anv_pipeline_stage *gs_stage,
 721                         struct anv_pipeline_stage *prev_stage)
 722 {
 723    brw_compute_vue_map(compiler->devinfo,
 724                        &gs_stage->prog_data.gs.base.vue_map,
 725                        gs_stage->nir->info.outputs_written,
 726                        gs_stage->nir->info.separate_shader);
 727
 728    return brw_compile_gs(compiler, NULL, mem_ctx, &gs_stage->key.gs,
 729                          &gs_stage->prog_data.gs, gs_stage->nir,
 730                          NULL, -1, NULL);
 731 }
 732
 733 static void
 734 anv_pipeline_link_fs(const struct brw_compiler *compiler,
 735                      struct anv_pipeline_stage *stage)
 736 {
 737    unsigned num_rts = 0;
 738    const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1;
 739    struct anv_pipeline_binding rt_bindings[max_rt];
 740    nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
 741    int rt_to_bindings[max_rt];
 742    memset(rt_to_bindings, -1, sizeof(rt_to_bindings));
 743    bool rt_used[max_rt];
 744    memset(rt_used, 0, sizeof(rt_used));
 745
 746    /* Flag used render targets */
 747    nir_foreach_variable_safe(var, &stage->nir->outputs) {
 748       if (var->data.location < FRAG_RESULT_DATA0)
 749          continue;
 750
 751       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
 752       /* Unused or out-of-bounds */
 753       if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
 754          continue;
 755
 756       const unsigned array_len =
 757          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
 758       assert(rt + array_len <= max_rt);
 759
 760       for (unsigned i = 0; i < array_len; i++)
 761          rt_used[rt + i] = true;
 762    }
 763
 764    /* Set new, compacted, location */
 765    for (unsigned i = 0; i < max_rt; i++) {
 766       if (!rt_used[i])
 767          continue;
 768
 769       rt_to_bindings[i] = num_rts;
 770       rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
 771          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 772          .binding = 0,
 773          .index = i,
 774       };
 775       num_rts++;
 776    }
 777
 778    bool deleted_output = false;
 779    nir_foreach_variable_safe(var, &stage->nir->outputs) {
 780       if (var->data.location < FRAG_RESULT_DATA0)
 781          continue;
 782
 783       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
 784       if (rt >= MAX_RTS ||
 785           !(stage->key.wm.color_outputs_valid & (1 << rt))) {
 786          /* Unused or out-of-bounds, throw it away */
 787          deleted_output = true;
 788          var->data.mode = nir_var_local;
 789          exec_node_remove(&var->node);
 790          exec_list_push_tail(&impl->locals, &var->node);
 791          continue;
 792       }
 793
 794       /* Give it the new location */
 795       assert(rt_to_bindings[rt] != -1);
 796       var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0;
 797    }
 798
 799    if (deleted_output)
 800       nir_fixup_deref_modes(stage->nir);
 801
 802    if (num_rts == 0) {
 803       /* If we have no render targets, we need a null render target */
 804       rt_bindings[0] = (struct anv_pipeline_binding) {
 805          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 806          .binding = 0,
 807          .index = UINT32_MAX,
 808       };
 809       num_rts = 1;
 810    }
 811
 812    /* Now that we've determined the actual number of render targets, adjust
 813     * the key accordingly.
 814     */
 815    stage->key.wm.nr_color_regions = num_rts;
 816    stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
 817
 818    assert(num_rts <= max_rt);
 819    assert(stage->bind_map.surface_count == 0);
 820    typed_memcpy(stage->bind_map.surface_to_descriptor,
 821                 rt_bindings, num_rts);
 822    stage->bind_map.surface_count += num_rts;
 823
 824    anv_fill_binding_table(&stage->prog_data.wm.base, 0);
 825 }
 826
 827 static const unsigned *
 828 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
 829                         void *mem_ctx,
 830                         struct anv_pipeline_stage *fs_stage,
 831                         struct anv_pipeline_stage *prev_stage)
 832 {
 833    /* TODO: we could set this to 0 based on the information in nir_shader, but
 834     * we need this before we call spirv_to_nir.
 835     */
 836    assert(prev_stage);
 837    fs_stage->key.wm.input_slots_valid =
 838       prev_stage->prog_data.vue.vue_map.slots_valid;
 839
 840    const unsigned *code =
 841       brw_compile_fs(compiler, NULL, mem_ctx, &fs_stage->key.wm,
 842                      &fs_stage->prog_data.wm, fs_stage->nir,
 843                      NULL, -1, -1, -1, true, false, NULL, NULL);
 844
 845    if (fs_stage->key.wm.nr_color_regions == 0 &&
 846        !fs_stage->prog_data.wm.has_side_effects &&
 847        !fs_stage->prog_data.wm.uses_kill &&
 848        fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
 849        !fs_stage->prog_data.wm.computed_stencil) {
 850       /* This fragment shader has no outputs and no side effects.  Go ahead
 851        * and return the code pointer so we don't accidentally think the
 852        * compile failed but zero out prog_data which will set program_size to
 853        * zero and disable the stage.
 854        */
 855       memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
 856    }
 857
 858    return code;
 859 }
 860
 861 static VkResult
 862 anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
 863                               struct anv_pipeline_cache *cache,
 864                               const VkGraphicsPipelineCreateInfo *info)
 865 {
 866    const struct brw_compiler *compiler =
 867       pipeline->device->instance->physicalDevice.compiler;
 868    struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
 869
 870    pipeline->active_stages = 0;
 871
 872    VkResult result;
 873    for (uint32_t i = 0; i < info->stageCount; i++) {
 874       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
 875       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
 876
 877       pipeline->active_stages |= sinfo->stage;
 878
 879       stages[stage].stage = stage;
 880       stages[stage].module = anv_shader_module_from_handle(sinfo->module);
 881       stages[stage].entrypoint = sinfo->pName;
 882       stages[stage].spec_info = sinfo->pSpecializationInfo;
 883
 884       const struct gen_device_info *devinfo = &pipeline->device->info;
 885       switch (stage) {
 886       case MESA_SHADER_VERTEX:
 887          populate_vs_prog_key(devinfo, &stages[stage].key.vs);
 888          break;
 889       case MESA_SHADER_TESS_CTRL:
 890          populate_tcs_prog_key(devinfo,
 891                                info->pTessellationState->patchControlPoints,
 892                                &stages[stage].key.tcs);
 893          break;
 894       case MESA_SHADER_TESS_EVAL:
 895          populate_tes_prog_key(devinfo, &stages[stage].key.tes);
 896          break;
 897       case MESA_SHADER_GEOMETRY:
 898          populate_gs_prog_key(devinfo, &stages[stage].key.gs);
 899          break;
 900       case MESA_SHADER_FRAGMENT:
 901          populate_wm_prog_key(devinfo, pipeline->subpass,
 902                               info->pMultisampleState,
 903                               &stages[stage].key.wm);
 904          break;
 905       default:
 906          unreachable("Invalid graphics shader stage");
 907       }
 908    }
 909
 910    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
 911       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
 912
 913    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
 914
 915    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 916
 917    unsigned char sha1[20];
 918    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
 919
 920    unsigned found = 0;
 921    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 922       if (!stages[s].entrypoint)
 923          continue;
 924
 925       stages[s].cache_key.stage = s;
 926       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
 927
 928       struct anv_shader_bin *bin =
 929          anv_device_search_for_kernel(pipeline->device, cache,
 930                                       &stages[s].cache_key,
 931                                       sizeof(stages[s].cache_key));
 932       if (bin) {
 933          found++;
 934          pipeline->shaders[s] = bin;
 935       }
 936    }
 937
 938    if (found == __builtin_popcount(pipeline->active_stages)) {
 939       /* We found all our shaders in the cache.  We're done. */
 940       goto done;
 941    } else if (found > 0) {
 942       /* We found some but not all of our shaders.  This shouldn't happen
 943        * most of the time but it can if we have a partially populated
 944        * pipeline cache.
 945        */
 946       assert(found < __builtin_popcount(pipeline->active_stages));
 947
 948       vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
 949                       VK_DEBUG_REPORT_WARNING_BIT_EXT |
 950                       VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
 951                       VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
 952                       (uint64_t)(uintptr_t)cache,
 953                       0, 0, "anv",
 954                       "Found a partial pipeline in the cache.  This is "
 955                       "most likely caused by an incomplete pipeline cache "
 956                       "import or export");
 957
 958       /* We're going to have to recompile anyway, so just throw away our
 959        * references to the shaders in the cache.  We'll get them out of the
 960        * cache again as part of the compilation process.
 961        */
 962       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 963          if (pipeline->shaders[s]) {
 964             anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
 965             pipeline->shaders[s] = NULL;
 966          }
 967       }
 968    }
 969
 970    void *pipeline_ctx = ralloc_context(NULL);
 971
 972    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 973       if (!stages[s].entrypoint)
 974          continue;
 975
 976       assert(stages[s].stage == s);
 977       assert(pipeline->shaders[s] == NULL);
 978
 979       stages[s].bind_map = (struct anv_pipeline_bind_map) {
 980          .surface_to_descriptor = stages[s].surface_to_descriptor,
 981          .sampler_to_descriptor = stages[s].sampler_to_descriptor
 982       };
 983
 984       stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx,
 985                                                 stages[s].module,
 986                                                 stages[s].entrypoint,
 987                                                 stages[s].stage,
 988                                                 stages[s].spec_info);
 989       if (stages[s].nir == NULL) {
 990          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 991          goto fail;
 992       }
 993    }
 994
 995    /* Walk backwards to link */
 996    struct anv_pipeline_stage *next_stage = NULL;
 997    for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
 998       if (!stages[s].entrypoint)
 999          continue;
1000
1001       switch (s) {
1002       case MESA_SHADER_VERTEX:
1003          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1004          break;
1005       case MESA_SHADER_TESS_CTRL:
1006          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1007          break;
1008       case MESA_SHADER_TESS_EVAL:
1009          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1010          break;
1011       case MESA_SHADER_GEOMETRY:
1012          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1013          break;
1014       case MESA_SHADER_FRAGMENT:
1015          anv_pipeline_link_fs(compiler, &stages[s]);
1016          break;
1017       default:
1018          unreachable("Invalid graphics shader stage");
1019       }
1020
1021       next_stage = &stages[s];
1022    }
1023
1024    struct anv_pipeline_stage *prev_stage = NULL;
1025    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1026       if (!stages[s].entrypoint)
1027          continue;
1028
1029       void *stage_ctx = ralloc_context(NULL);
1030
1031       anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
1032
1033       const unsigned *code;
1034       switch (s) {
1035       case MESA_SHADER_VERTEX:
1036          code = anv_pipeline_compile_vs(compiler, stage_ctx, &stages[s]);
1037          break;
1038       case MESA_SHADER_TESS_CTRL:
1039          code = anv_pipeline_compile_tcs(compiler, stage_ctx,
1040                                          &stages[s], prev_stage);
1041          break;
1042       case MESA_SHADER_TESS_EVAL:
1043          code = anv_pipeline_compile_tes(compiler, stage_ctx,
1044                                          &stages[s], prev_stage);
1045          break;
1046       case MESA_SHADER_GEOMETRY:
1047          code = anv_pipeline_compile_gs(compiler, stage_ctx,
1048                                         &stages[s], prev_stage);
1049          break;
1050       case MESA_SHADER_FRAGMENT:
1051          code = anv_pipeline_compile_fs(compiler, stage_ctx,
1052                                         &stages[s], prev_stage);
1053          break;
1054       default:
1055          unreachable("Invalid graphics shader stage");
1056       }
1057       if (code == NULL) {
1058          ralloc_free(stage_ctx);
1059          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1060          goto fail;
1061       }
1062
1063       struct anv_shader_bin *bin =
1064          anv_device_upload_kernel(pipeline->device, cache,
1065                                   &stages[s].cache_key,
1066                                   sizeof(stages[s].cache_key),
1067                                   code, stages[s].prog_data.base.program_size,
1068                                   stages[s].nir->constant_data,
1069                                   stages[s].nir->constant_data_size,
1070                                   &stages[s].prog_data.base,
1071                                   brw_prog_data_size(s),
1072                                   &stages[s].bind_map);
1073       if (!bin) {
1074          ralloc_free(stage_ctx);
1075          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1076          goto fail;
1077       }
1078
1079       pipeline->shaders[s] = bin;
1080       ralloc_free(stage_ctx);
1081
1082       prev_stage = &stages[s];
1083    }
1084
1085    ralloc_free(pipeline_ctx);
1086
1087 done:
1088
1089    if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1090        pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1091       /* This can happen if we decided to implicitly disable the fragment
1092        * shader.  See anv_pipeline_compile_fs().
1093        */
1094       anv_shader_bin_unref(pipeline->device,
1095                            pipeline->shaders[MESA_SHADER_FRAGMENT]);
1096       pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1097       pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1098    }
1099
1100    return VK_SUCCESS;
1101
1102 fail:
1103    ralloc_free(pipeline_ctx);
1104
1105    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1106       if (pipeline->shaders[s])
1107          anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1108    }
1109
1110    return result;
1111 }
1112
1113 VkResult
1114 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
1115                         struct anv_pipeline_cache *cache,
1116                         const VkComputePipelineCreateInfo *info,
1117                         const struct anv_shader_module *module,
1118                         const char *entrypoint,
1119                         const VkSpecializationInfo *spec_info)
1120 {
1121    const struct brw_compiler *compiler =
1122       pipeline->device->instance->physicalDevice.compiler;
1123
1124    struct anv_pipeline_stage stage = {
1125       .stage = MESA_SHADER_COMPUTE,
1126       .module = module,
1127       .entrypoint = entrypoint,
1128       .spec_info = spec_info,
1129       .cache_key = {
1130          .stage = MESA_SHADER_COMPUTE,
1131       }
1132    };
1133
1134    struct anv_shader_bin *bin = NULL;
1135
1136    populate_cs_prog_key(&pipeline->device->info, &stage.key.cs);
1137
1138    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1139
1140    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1141    bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
1142                                       sizeof(stage.cache_key));
1143
1144    if (bin == NULL) {
1145       stage.bind_map = (struct anv_pipeline_bind_map) {
1146          .surface_to_descriptor = stage.surface_to_descriptor,
1147          .sampler_to_descriptor = stage.sampler_to_descriptor
1148       };
1149
1150       void *mem_ctx = ralloc_context(NULL);
1151
1152       stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
1153                                             stage.module,
1154                                             stage.entrypoint,
1155                                             stage.stage,
1156                                             stage.spec_info);
1157       if (stage.nir == NULL) {
1158          ralloc_free(mem_ctx);
1159          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1160       }
1161
1162       anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
1163
1164       NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
1165                  &stage.prog_data.cs);
1166
1167       anv_fill_binding_table(&stage.prog_data.cs.base, 1);
1168
1169       const unsigned *shader_code =
1170          brw_compile_cs(compiler, NULL, mem_ctx, &stage.key.cs,
1171                         &stage.prog_data.cs, stage.nir, -1, NULL);
1172       if (shader_code == NULL) {
1173          ralloc_free(mem_ctx);
1174          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1175       }
1176
1177       const unsigned code_size = stage.prog_data.base.program_size;
1178       bin = anv_device_upload_kernel(pipeline->device, cache,
1179                                      &stage.cache_key, sizeof(stage.cache_key),
1180                                      shader_code, code_size,
1181                                      stage.nir->constant_data,
1182                                      stage.nir->constant_data_size,
1183                                      &stage.prog_data.base,
1184                                      sizeof(stage.prog_data.cs),
1185                                      &stage.bind_map);
1186       if (!bin) {
1187          ralloc_free(mem_ctx);
1188          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1189       }
1190
1191       ralloc_free(mem_ctx);
1192    }
1193
1194    pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
1195    pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
1196
1197    return VK_SUCCESS;
1198 }
1199
1200 /**
1201  * Copy pipeline state not marked as dynamic.
1202  * Dynamic state is pipeline state which hasn't been provided at pipeline
1203  * creation time, but is dynamically provided afterwards using various
1204  * vkCmdSet* functions.
1205  *
1206  * The set of state considered "non_dynamic" is determined by the pieces of
1207  * state that have their corresponding VkDynamicState enums omitted from
1208  * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1209  *
1210  * @param[out] pipeline    Destination non_dynamic state.
1211  * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
1212  */
1213 static void
1214 copy_non_dynamic_state(struct anv_pipeline *pipeline,
1215                        const VkGraphicsPipelineCreateInfo *pCreateInfo)
1216 {
1217    anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1218    struct anv_subpass *subpass = pipeline->subpass;
1219
1220    pipeline->dynamic_state = default_dynamic_state;
1221
1222    if (pCreateInfo->pDynamicState) {
1223       /* Remove all of the states that are marked as dynamic */
1224       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1225       for (uint32_t s = 0; s < count; s++)
1226          states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
1227    }
1228
1229    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1230
1231    /* Section 9.2 of the Vulkan 1.0.15 spec says:
1232     *
1233     *    pViewportState is [...] NULL if the pipeline
1234     *    has rasterization disabled.
1235     */
1236    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1237       assert(pCreateInfo->pViewportState);
1238
1239       dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1240       if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
1241          typed_memcpy(dynamic->viewport.viewports,
1242                      pCreateInfo->pViewportState->pViewports,
1243                      pCreateInfo->pViewportState->viewportCount);
1244       }
1245
1246       dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1247       if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
1248          typed_memcpy(dynamic->scissor.scissors,
1249                      pCreateInfo->pViewportState->pScissors,
1250                      pCreateInfo->pViewportState->scissorCount);
1251       }
1252    }
1253
1254    if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
1255       assert(pCreateInfo->pRasterizationState);
1256       dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1257    }
1258
1259    if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
1260       assert(pCreateInfo->pRasterizationState);
1261       dynamic->depth_bias.bias =
1262          pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1263       dynamic->depth_bias.clamp =
1264          pCreateInfo->pRasterizationState->depthBiasClamp;
1265       dynamic->depth_bias.slope =
1266          pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1267    }
1268
1269    /* Section 9.2 of the Vulkan 1.0.15 spec says:
1270     *
1271     *    pColorBlendState is [...] NULL if the pipeline has rasterization
1272     *    disabled or if the subpass of the render pass the pipeline is
1273     *    created against does not use any color attachments.
1274     */
1275    bool uses_color_att = false;
1276    for (unsigned i = 0; i < subpass->color_count; ++i) {
1277       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1278          uses_color_att = true;
1279          break;
1280       }
1281    }
1282
1283    if (uses_color_att &&
1284        !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1285       assert(pCreateInfo->pColorBlendState);
1286
1287       if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
1288          typed_memcpy(dynamic->blend_constants,
1289                      pCreateInfo->pColorBlendState->blendConstants, 4);
1290    }
1291
1292    /* If there is no depthstencil attachment, then don't read
1293     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1294     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1295     * no need to override the depthstencil defaults in
1296     * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1297     *
1298     * Section 9.2 of the Vulkan 1.0.15 spec says:
1299     *
1300     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
1301     *    disabled or if the subpass of the render pass the pipeline is created
1302     *    against does not use a depth/stencil attachment.
1303     */
1304    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1305        subpass->depth_stencil_attachment) {
1306       assert(pCreateInfo->pDepthStencilState);
1307
1308       if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
1309          dynamic->depth_bounds.min =
1310             pCreateInfo->pDepthStencilState->minDepthBounds;
1311          dynamic->depth_bounds.max =
1312             pCreateInfo->pDepthStencilState->maxDepthBounds;
1313       }
1314
1315       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
1316          dynamic->stencil_compare_mask.front =
1317             pCreateInfo->pDepthStencilState->front.compareMask;
1318          dynamic->stencil_compare_mask.back =
1319             pCreateInfo->pDepthStencilState->back.compareMask;
1320       }
1321
1322       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
1323          dynamic->stencil_write_mask.front =
1324             pCreateInfo->pDepthStencilState->front.writeMask;
1325          dynamic->stencil_write_mask.back =
1326             pCreateInfo->pDepthStencilState->back.writeMask;
1327       }
1328
1329       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
1330          dynamic->stencil_reference.front =
1331             pCreateInfo->pDepthStencilState->front.reference;
1332          dynamic->stencil_reference.back =
1333             pCreateInfo->pDepthStencilState->back.reference;
1334       }
1335    }
1336
1337    pipeline->dynamic_state_mask = states;
1338 }
1339
1340 static void
1341 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1342 {
1343 #ifdef DEBUG
1344    struct anv_render_pass *renderpass = NULL;
1345    struct anv_subpass *subpass = NULL;
1346
1347    /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1348     * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1349     */
1350    assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1351
1352    renderpass = anv_render_pass_from_handle(info->renderPass);
1353    assert(renderpass);
1354
1355    assert(info->subpass < renderpass->subpass_count);
1356    subpass = &renderpass->subpasses[info->subpass];
1357
1358    assert(info->stageCount >= 1);
1359    assert(info->pVertexInputState);
1360    assert(info->pInputAssemblyState);
1361    assert(info->pRasterizationState);
1362    if (!info->pRasterizationState->rasterizerDiscardEnable) {
1363       assert(info->pViewportState);
1364       assert(info->pMultisampleState);
1365
1366       if (subpass && subpass->depth_stencil_attachment)
1367          assert(info->pDepthStencilState);
1368
1369       if (subpass && subpass->color_count > 0) {
1370          bool all_color_unused = true;
1371          for (int i = 0; i < subpass->color_count; i++) {
1372             if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1373                all_color_unused = false;
1374          }
1375          /* pColorBlendState is ignored if the pipeline has rasterization
1376           * disabled or if the subpass of the render pass the pipeline is
1377           * created against does not use any color attachments.
1378           */
1379          assert(info->pColorBlendState || all_color_unused);
1380       }
1381    }
1382
1383    for (uint32_t i = 0; i < info->stageCount; ++i) {
1384       switch (info->pStages[i].stage) {
1385       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1386       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1387          assert(info->pTessellationState);
1388          break;
1389       default:
1390          break;
1391       }
1392    }
1393 #endif
1394 }
1395
1396 /**
1397  * Calculate the desired L3 partitioning based on the current state of the
1398  * pipeline.  For now this simply returns the conservative defaults calculated
1399  * by get_default_l3_weights(), but we could probably do better by gathering
1400  * more statistics from the pipeline state (e.g. guess of expected URB usage
1401  * and bound surfaces), or by using feed-back from performance counters.
1402  */
1403 void
1404 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1405 {
1406    const struct gen_device_info *devinfo = &pipeline->device->info;
1407
1408    const struct gen_l3_weights w =
1409       gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
1410
1411    pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
1412    pipeline->urb.total_size =
1413       gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
1414 }
1415
1416 VkResult
1417 anv_pipeline_init(struct anv_pipeline *pipeline,
1418                   struct anv_device *device,
1419                   struct anv_pipeline_cache *cache,
1420                   const VkGraphicsPipelineCreateInfo *pCreateInfo,
1421                   const VkAllocationCallbacks *alloc)
1422 {
1423    VkResult result;
1424
1425    anv_pipeline_validate_create_info(pCreateInfo);
1426
1427    if (alloc == NULL)
1428       alloc = &device->alloc;
1429
1430    pipeline->device = device;
1431
1432    ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1433    assert(pCreateInfo->subpass < render_pass->subpass_count);
1434    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1435
1436    result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
1437    if (result != VK_SUCCESS)
1438       return result;
1439
1440    pipeline->batch.alloc = alloc;
1441    pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
1442    pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1443    pipeline->batch.relocs = &pipeline->batch_relocs;
1444    pipeline->batch.status = VK_SUCCESS;
1445
1446    copy_non_dynamic_state(pipeline, pCreateInfo);
1447    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1448                                   pCreateInfo->pRasterizationState->depthClampEnable;
1449
1450    pipeline->sample_shading_enable = pCreateInfo->pMultisampleState &&
1451                                      pCreateInfo->pMultisampleState->sampleShadingEnable;
1452
1453    pipeline->needs_data_cache = false;
1454
1455    /* When we free the pipeline, we detect stages based on the NULL status
1456     * of various prog_data pointers.  Make them NULL by default.
1457     */
1458    memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1459
1460    result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
1461    if (result != VK_SUCCESS) {
1462       anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1463       return result;
1464    }
1465
1466    assert(pipeline->shaders[MESA_SHADER_VERTEX]);
1467
1468    anv_pipeline_setup_l3_config(pipeline, false);
1469
1470    const VkPipelineVertexInputStateCreateInfo *vi_info =
1471       pCreateInfo->pVertexInputState;
1472
1473    const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1474
1475    pipeline->vb_used = 0;
1476    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1477       const VkVertexInputAttributeDescription *desc =
1478          &vi_info->pVertexAttributeDescriptions[i];
1479
1480       if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
1481          pipeline->vb_used |= 1 << desc->binding;
1482    }
1483
1484    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1485       const VkVertexInputBindingDescription *desc =
1486          &vi_info->pVertexBindingDescriptions[i];
1487
1488       pipeline->vb[desc->binding].stride = desc->stride;
1489
1490       /* Step rate is programmed per vertex element (attribute), not
1491        * binding. Set up a map of which bindings step per instance, for
1492        * reference by vertex element setup. */
1493       switch (desc->inputRate) {
1494       default:
1495       case VK_VERTEX_INPUT_RATE_VERTEX:
1496          pipeline->vb[desc->binding].instanced = false;
1497          break;
1498       case VK_VERTEX_INPUT_RATE_INSTANCE:
1499          pipeline->vb[desc->binding].instanced = true;
1500          break;
1501       }
1502
1503       pipeline->vb[desc->binding].instance_divisor = 1;
1504    }
1505
1506    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
1507       vk_find_struct_const(vi_info->pNext,
1508                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1509    if (vi_div_state) {
1510       for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
1511          const VkVertexInputBindingDivisorDescriptionEXT *desc =
1512             &vi_div_state->pVertexBindingDivisors[i];
1513
1514          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
1515       }
1516    }
1517
1518    /* Our implementation of VK_KHR_multiview uses instancing to draw the
1519     * different views.  If the client asks for instancing, we need to multiply
1520     * the instance divisor by the number of views ensure that we repeat the
1521     * client's per-instance data once for each view.
1522     */
1523    if (pipeline->subpass->view_mask) {
1524       const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
1525       for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
1526          if (pipeline->vb[vb].instanced)
1527             pipeline->vb[vb].instance_divisor *= view_count;
1528       }
1529    }
1530
1531    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1532       pCreateInfo->pInputAssemblyState;
1533    const VkPipelineTessellationStateCreateInfo *tess_info =
1534       pCreateInfo->pTessellationState;
1535    pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1536
1537    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1538       pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
1539    else
1540       pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1541
1542    return VK_SUCCESS;
1543 }