src/intel/vulkan/anv_pipeline.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28 #include <fcntl.h>
  29
  30 #include "util/mesa-sha1.h"
  31 #include "common/gen_l3_config.h"
  32 #include "anv_private.h"
  33 #include "brw_nir.h"
  34 #include "anv_nir.h"
  35 #include "spirv/nir_spirv.h"
  36
  37 /* Needed for SWIZZLE macros */
  38 #include "program/prog_instruction.h"
  39
  40 // Shader functions
  41
  42 VkResult anv_CreateShaderModule(
  43     VkDevice                                    _device,
  44     const VkShaderModuleCreateInfo*             pCreateInfo,
  45     const VkAllocationCallbacks*                pAllocator,
  46     VkShaderModule*                             pShaderModule)
  47 {
  48    ANV_FROM_HANDLE(anv_device, device, _device);
  49    struct anv_shader_module *module;
  50
  51    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
  52    assert(pCreateInfo->flags == 0);
  53
  54    module = vk_alloc2(&device->alloc, pAllocator,
  55                        sizeof(*module) + pCreateInfo->codeSize, 8,
  56                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  57    if (module == NULL)
  58       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  59
  60    module->size = pCreateInfo->codeSize;
  61    memcpy(module->data, pCreateInfo->pCode, module->size);
  62
  63    _mesa_sha1_compute(module->data, module->size, module->sha1);
  64
  65    *pShaderModule = anv_shader_module_to_handle(module);
  66
  67    return VK_SUCCESS;
  68 }
  69
  70 void anv_DestroyShaderModule(
  71     VkDevice                                    _device,
  72     VkShaderModule                              _module,
  73     const VkAllocationCallbacks*                pAllocator)
  74 {
  75    ANV_FROM_HANDLE(anv_device, device, _device);
  76    ANV_FROM_HANDLE(anv_shader_module, module, _module);
  77
  78    vk_free2(&device->alloc, pAllocator, module);
  79 }
  80
  81 #define SPIR_V_MAGIC_NUMBER 0x07230203
  82
  83 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
  84  * we can't do that yet because we don't have the ability to copy nir.
  85  */
  86 static nir_shader *
  87 anv_shader_compile_to_nir(struct anv_device *device,
  88                           struct anv_shader_module *module,
  89                           const char *entrypoint_name,
  90                           gl_shader_stage stage,
  91                           const VkSpecializationInfo *spec_info)
  92 {
  93    if (strcmp(entrypoint_name, "main") != 0) {
  94       anv_finishme("Multiple shaders per module not really supported");
  95    }
  96
  97    const struct brw_compiler *compiler =
  98       device->instance->physicalDevice.compiler;
  99    const nir_shader_compiler_options *nir_options =
 100       compiler->glsl_compiler_options[stage].NirOptions;
 101
 102    uint32_t *spirv = (uint32_t *) module->data;
 103    assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
 104    assert(module->size % 4 == 0);
 105
 106    uint32_t num_spec_entries = 0;
 107    struct nir_spirv_specialization *spec_entries = NULL;
 108    if (spec_info && spec_info->mapEntryCount > 0) {
 109       num_spec_entries = spec_info->mapEntryCount;
 110       spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
 111       for (uint32_t i = 0; i < num_spec_entries; i++) {
 112          VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
 113          const void *data = spec_info->pData + entry.offset;
 114          assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 115
 116          spec_entries[i].id = spec_info->pMapEntries[i].constantID;
 117          spec_entries[i].data = *(const uint32_t *)data;
 118       }
 119    }
 120
 121    nir_function *entry_point =
 122       spirv_to_nir(spirv, module->size / 4,
 123                    spec_entries, num_spec_entries,
 124                    stage, entrypoint_name, nir_options);
 125    nir_shader *nir = entry_point->shader;
 126    assert(nir->stage == stage);
 127    nir_validate_shader(nir);
 128
 129    free(spec_entries);
 130
 131    if (stage == MESA_SHADER_FRAGMENT) {
 132       nir_lower_wpos_center(nir);
 133       nir_validate_shader(nir);
 134    }
 135
 136    nir_lower_returns(nir);
 137    nir_validate_shader(nir);
 138
 139    nir_inline_functions(nir);
 140    nir_validate_shader(nir);
 141
 142    /* Pick off the single entrypoint that we want */
 143    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
 144       if (func != entry_point)
 145          exec_node_remove(&func->node);
 146    }
 147    assert(exec_list_length(&nir->functions) == 1);
 148    entry_point->name = ralloc_strdup(entry_point, "main");
 149
 150    nir_remove_dead_variables(nir, nir_var_shader_in);
 151    nir_remove_dead_variables(nir, nir_var_shader_out);
 152    nir_remove_dead_variables(nir, nir_var_system_value);
 153    nir_validate_shader(nir);
 154
 155    nir_propagate_invariant(nir);
 156    nir_validate_shader(nir);
 157
 158    nir_lower_io_to_temporaries(entry_point->shader, entry_point->impl,
 159                                true, false);
 160
 161    nir_lower_system_values(nir);
 162    nir_validate_shader(nir);
 163
 164    /* Vulkan uses the separate-shader linking model */
 165    nir->info->separate_shader = true;
 166
 167    nir = brw_preprocess_nir(compiler, nir);
 168
 169    nir_shader_gather_info(nir, entry_point->impl);
 170
 171    nir_variable_mode indirect_mask = 0;
 172    if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
 173       indirect_mask |= nir_var_shader_in;
 174    if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
 175       indirect_mask |= nir_var_shader_out;
 176    if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
 177       indirect_mask |= nir_var_local;
 178
 179    nir_lower_indirect_derefs(nir, indirect_mask);
 180
 181    return nir;
 182 }
 183
 184 void anv_DestroyPipeline(
 185     VkDevice                                    _device,
 186     VkPipeline                                  _pipeline,
 187     const VkAllocationCallbacks*                pAllocator)
 188 {
 189    ANV_FROM_HANDLE(anv_device, device, _device);
 190    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
 191
 192    anv_reloc_list_finish(&pipeline->batch_relocs,
 193                          pAllocator ? pAllocator : &device->alloc);
 194    if (pipeline->blend_state.map)
 195       anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
 196
 197    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
 198       if (pipeline->shaders[s])
 199          anv_shader_bin_unref(device, pipeline->shaders[s]);
 200    }
 201
 202    vk_free2(&device->alloc, pAllocator, pipeline);
 203 }
 204
 205 static const uint32_t vk_to_gen_primitive_type[] = {
 206    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
 207    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
 208    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
 209    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
 210    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
 211    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
 212    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
 213    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
 214    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
 215    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
 216 /*   [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST]                = _3DPRIM_PATCHLIST_1 */
 217 };
 218
 219 static void
 220 populate_sampler_prog_key(const struct gen_device_info *devinfo,
 221                           struct brw_sampler_prog_key_data *key)
 222 {
 223    /* XXX: Handle texture swizzle on HSW- */
 224    for (int i = 0; i < MAX_SAMPLERS; i++) {
 225       /* Assume color sampler, no swizzling. (Works for BDW+) */
 226       key->swizzles[i] = SWIZZLE_XYZW;
 227    }
 228 }
 229
 230 static void
 231 populate_vs_prog_key(const struct gen_device_info *devinfo,
 232                      struct brw_vs_prog_key *key)
 233 {
 234    memset(key, 0, sizeof(*key));
 235
 236    populate_sampler_prog_key(devinfo, &key->tex);
 237
 238    /* XXX: Handle vertex input work-arounds */
 239
 240    /* XXX: Handle sampler_prog_key */
 241 }
 242
 243 static void
 244 populate_gs_prog_key(const struct gen_device_info *devinfo,
 245                      struct brw_gs_prog_key *key)
 246 {
 247    memset(key, 0, sizeof(*key));
 248
 249    populate_sampler_prog_key(devinfo, &key->tex);
 250 }
 251
 252 static void
 253 populate_wm_prog_key(const struct gen_device_info *devinfo,
 254                      const VkGraphicsPipelineCreateInfo *info,
 255                      struct brw_wm_prog_key *key)
 256 {
 257    ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
 258
 259    memset(key, 0, sizeof(*key));
 260
 261    populate_sampler_prog_key(devinfo, &key->tex);
 262
 263    /* TODO: Fill out key->input_slots_valid */
 264
 265    /* Vulkan doesn't specify a default */
 266    key->high_quality_derivatives = false;
 267
 268    /* XXX Vulkan doesn't appear to specify */
 269    key->clamp_fragment_color = false;
 270
 271    key->nr_color_regions =
 272       render_pass->subpasses[info->subpass].color_count;
 273
 274    key->replicate_alpha = key->nr_color_regions > 1 &&
 275                           info->pMultisampleState &&
 276                           info->pMultisampleState->alphaToCoverageEnable;
 277
 278    if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) {
 279       /* We should probably pull this out of the shader, but it's fairly
 280        * harmless to compute it and then let dead-code take care of it.
 281        */
 282       key->persample_interp =
 283          (info->pMultisampleState->minSampleShading *
 284           info->pMultisampleState->rasterizationSamples) > 1;
 285       key->multisample_fbo = true;
 286    }
 287 }
 288
 289 static void
 290 populate_cs_prog_key(const struct gen_device_info *devinfo,
 291                      struct brw_cs_prog_key *key)
 292 {
 293    memset(key, 0, sizeof(*key));
 294
 295    populate_sampler_prog_key(devinfo, &key->tex);
 296 }
 297
 298 static nir_shader *
 299 anv_pipeline_compile(struct anv_pipeline *pipeline,
 300                      struct anv_shader_module *module,
 301                      const char *entrypoint,
 302                      gl_shader_stage stage,
 303                      const VkSpecializationInfo *spec_info,
 304                      struct brw_stage_prog_data *prog_data,
 305                      struct anv_pipeline_bind_map *map)
 306 {
 307    nir_shader *nir = anv_shader_compile_to_nir(pipeline->device,
 308                                                module, entrypoint, stage,
 309                                                spec_info);
 310    if (nir == NULL)
 311       return NULL;
 312
 313    anv_nir_lower_push_constants(nir);
 314
 315    /* Figure out the number of parameters */
 316    prog_data->nr_params = 0;
 317
 318    if (nir->num_uniforms > 0) {
 319       /* If the shader uses any push constants at all, we'll just give
 320        * them the maximum possible number
 321        */
 322       assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
 323       prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
 324    }
 325
 326    if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
 327       prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
 328
 329    if (nir->info->num_images > 0) {
 330       prog_data->nr_params += nir->info->num_images * BRW_IMAGE_PARAM_SIZE;
 331       pipeline->needs_data_cache = true;
 332    }
 333
 334    if (stage == MESA_SHADER_COMPUTE)
 335       ((struct brw_cs_prog_data *)prog_data)->thread_local_id_index =
 336          prog_data->nr_params++; /* The CS Thread ID uniform */
 337
 338    if (nir->info->num_ssbos > 0)
 339       pipeline->needs_data_cache = true;
 340
 341    if (prog_data->nr_params > 0) {
 342       /* XXX: I think we're leaking this */
 343       prog_data->param = (const union gl_constant_value **)
 344          malloc(prog_data->nr_params * sizeof(union gl_constant_value *));
 345
 346       /* We now set the param values to be offsets into a
 347        * anv_push_constant_data structure.  Since the compiler doesn't
 348        * actually dereference any of the gl_constant_value pointers in the
 349        * params array, it doesn't really matter what we put here.
 350        */
 351       struct anv_push_constants *null_data = NULL;
 352       if (nir->num_uniforms > 0) {
 353          /* Fill out the push constants section of the param array */
 354          for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
 355             prog_data->param[i] = (const union gl_constant_value *)
 356                &null_data->client_data[i * sizeof(float)];
 357       }
 358    }
 359
 360    /* Set up dynamic offsets */
 361    anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
 362
 363    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
 364    if (pipeline->layout)
 365       anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
 366
 367    /* nir_lower_io will only handle the push constants; we need to set this
 368     * to the full number of possible uniforms.
 369     */
 370    nir->num_uniforms = prog_data->nr_params * 4;
 371
 372    return nir;
 373 }
 374
 375 static void
 376 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
 377 {
 378    prog_data->binding_table.size_bytes = 0;
 379    prog_data->binding_table.texture_start = bias;
 380    prog_data->binding_table.gather_texture_start = bias;
 381    prog_data->binding_table.ubo_start = bias;
 382    prog_data->binding_table.ssbo_start = bias;
 383    prog_data->binding_table.image_start = bias;
 384 }
 385
 386 static struct anv_shader_bin *
 387 anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
 388                            struct anv_pipeline_cache *cache,
 389                            const void *key_data, uint32_t key_size,
 390                            const void *kernel_data, uint32_t kernel_size,
 391                            const struct brw_stage_prog_data *prog_data,
 392                            uint32_t prog_data_size,
 393                            const struct anv_pipeline_bind_map *bind_map)
 394 {
 395    if (cache) {
 396       return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
 397                                               kernel_data, kernel_size,
 398                                               prog_data, prog_data_size,
 399                                               bind_map);
 400    } else {
 401       return anv_shader_bin_create(pipeline->device, key_data, key_size,
 402                                    kernel_data, kernel_size,
 403                                    prog_data, prog_data_size,
 404                                    prog_data->param, bind_map);
 405    }
 406 }
 407
 408
 409 static void
 410 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
 411                                 gl_shader_stage stage,
 412                                 struct anv_shader_bin *shader)
 413 {
 414    pipeline->shaders[stage] = shader;
 415    pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
 416 }
 417
 418 static VkResult
 419 anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 420                         struct anv_pipeline_cache *cache,
 421                         const VkGraphicsPipelineCreateInfo *info,
 422                         struct anv_shader_module *module,
 423                         const char *entrypoint,
 424                         const VkSpecializationInfo *spec_info)
 425 {
 426    const struct brw_compiler *compiler =
 427       pipeline->device->instance->physicalDevice.compiler;
 428    struct anv_pipeline_bind_map map;
 429    struct brw_vs_prog_key key;
 430    struct anv_shader_bin *bin = NULL;
 431    unsigned char sha1[20];
 432
 433    populate_vs_prog_key(&pipeline->device->info, &key);
 434
 435    if (cache) {
 436       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
 437                       pipeline->layout, spec_info);
 438       bin = anv_pipeline_cache_search(cache, sha1, 20);
 439    }
 440
 441    if (bin == NULL) {
 442       struct brw_vs_prog_data prog_data = { 0, };
 443       struct anv_pipeline_binding surface_to_descriptor[256];
 444       struct anv_pipeline_binding sampler_to_descriptor[256];
 445
 446       map = (struct anv_pipeline_bind_map) {
 447          .surface_to_descriptor = surface_to_descriptor,
 448          .sampler_to_descriptor = sampler_to_descriptor
 449       };
 450
 451       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
 452                                              MESA_SHADER_VERTEX, spec_info,
 453                                              &prog_data.base.base, &map);
 454       if (nir == NULL)
 455          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 456
 457       anv_fill_binding_table(&prog_data.base.base, 0);
 458
 459       void *mem_ctx = ralloc_context(NULL);
 460
 461       ralloc_steal(mem_ctx, nir);
 462
 463       prog_data.inputs_read = nir->info->inputs_read;
 464
 465       brw_compute_vue_map(&pipeline->device->info,
 466                           &prog_data.base.vue_map,
 467                           nir->info->outputs_written,
 468                           nir->info->separate_shader);
 469
 470       unsigned code_size;
 471       const unsigned *shader_code =
 472          brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
 473                         NULL, false, -1, &code_size, NULL);
 474       if (shader_code == NULL) {
 475          ralloc_free(mem_ctx);
 476          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 477       }
 478
 479       bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
 480                                        shader_code, code_size,
 481                                        &prog_data.base.base, sizeof(prog_data),
 482                                        &map);
 483       if (!bin) {
 484          ralloc_free(mem_ctx);
 485          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 486       }
 487
 488       ralloc_free(mem_ctx);
 489    }
 490
 491    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
 492
 493    return VK_SUCCESS;
 494 }
 495
 496 static VkResult
 497 anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 498                         struct anv_pipeline_cache *cache,
 499                         const VkGraphicsPipelineCreateInfo *info,
 500                         struct anv_shader_module *module,
 501                         const char *entrypoint,
 502                         const VkSpecializationInfo *spec_info)
 503 {
 504    const struct brw_compiler *compiler =
 505       pipeline->device->instance->physicalDevice.compiler;
 506    struct anv_pipeline_bind_map map;
 507    struct brw_gs_prog_key key;
 508    struct anv_shader_bin *bin = NULL;
 509    unsigned char sha1[20];
 510
 511    populate_gs_prog_key(&pipeline->device->info, &key);
 512
 513    if (cache) {
 514       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
 515                       pipeline->layout, spec_info);
 516       bin = anv_pipeline_cache_search(cache, sha1, 20);
 517    }
 518
 519    if (bin == NULL) {
 520       struct brw_gs_prog_data prog_data = { 0, };
 521       struct anv_pipeline_binding surface_to_descriptor[256];
 522       struct anv_pipeline_binding sampler_to_descriptor[256];
 523
 524       map = (struct anv_pipeline_bind_map) {
 525          .surface_to_descriptor = surface_to_descriptor,
 526          .sampler_to_descriptor = sampler_to_descriptor
 527       };
 528
 529       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
 530                                              MESA_SHADER_GEOMETRY, spec_info,
 531                                              &prog_data.base.base, &map);
 532       if (nir == NULL)
 533          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 534
 535       anv_fill_binding_table(&prog_data.base.base, 0);
 536
 537       void *mem_ctx = ralloc_context(NULL);
 538
 539       ralloc_steal(mem_ctx, nir);
 540
 541       brw_compute_vue_map(&pipeline->device->info,
 542                           &prog_data.base.vue_map,
 543                           nir->info->outputs_written,
 544                           nir->info->separate_shader);
 545
 546       unsigned code_size;
 547       const unsigned *shader_code =
 548          brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
 549                         NULL, -1, &code_size, NULL);
 550       if (shader_code == NULL) {
 551          ralloc_free(mem_ctx);
 552          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 553       }
 554
 555       /* TODO: SIMD8 GS */
 556       bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
 557                                        shader_code, code_size,
 558                                        &prog_data.base.base, sizeof(prog_data),
 559                                        &map);
 560       if (!bin) {
 561          ralloc_free(mem_ctx);
 562          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 563       }
 564
 565       ralloc_free(mem_ctx);
 566    }
 567
 568    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
 569
 570    return VK_SUCCESS;
 571 }
 572
 573 static VkResult
 574 anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 575                         struct anv_pipeline_cache *cache,
 576                         const VkGraphicsPipelineCreateInfo *info,
 577                         struct anv_shader_module *module,
 578                         const char *entrypoint,
 579                         const VkSpecializationInfo *spec_info)
 580 {
 581    const struct brw_compiler *compiler =
 582       pipeline->device->instance->physicalDevice.compiler;
 583    struct anv_pipeline_bind_map map;
 584    struct brw_wm_prog_key key;
 585    struct anv_shader_bin *bin = NULL;
 586    unsigned char sha1[20];
 587
 588    populate_wm_prog_key(&pipeline->device->info, info, &key);
 589
 590    if (cache) {
 591       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
 592                       pipeline->layout, spec_info);
 593       bin = anv_pipeline_cache_search(cache, sha1, 20);
 594    }
 595
 596    if (bin == NULL) {
 597       struct brw_wm_prog_data prog_data = { 0, };
 598       struct anv_pipeline_binding surface_to_descriptor[256];
 599       struct anv_pipeline_binding sampler_to_descriptor[256];
 600
 601       map = (struct anv_pipeline_bind_map) {
 602          .surface_to_descriptor = surface_to_descriptor + 8,
 603          .sampler_to_descriptor = sampler_to_descriptor
 604       };
 605
 606       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
 607                                              MESA_SHADER_FRAGMENT, spec_info,
 608                                              &prog_data.base, &map);
 609       if (nir == NULL)
 610          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 611
 612       unsigned num_rts = 0;
 613       struct anv_pipeline_binding rt_bindings[8];
 614       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
 615       nir_foreach_variable_safe(var, &nir->outputs) {
 616          if (var->data.location < FRAG_RESULT_DATA0)
 617             continue;
 618
 619          unsigned rt = var->data.location - FRAG_RESULT_DATA0;
 620          if (rt >= key.nr_color_regions) {
 621             /* Out-of-bounds, throw it away */
 622             var->data.mode = nir_var_local;
 623             exec_node_remove(&var->node);
 624             exec_list_push_tail(&impl->locals, &var->node);
 625             continue;
 626          }
 627
 628          /* Give it a new, compacted, location */
 629          var->data.location = FRAG_RESULT_DATA0 + num_rts;
 630
 631          unsigned array_len =
 632             glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
 633          assert(num_rts + array_len <= 8);
 634
 635          for (unsigned i = 0; i < array_len; i++) {
 636             rt_bindings[num_rts + i] = (struct anv_pipeline_binding) {
 637                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 638                .binding = 0,
 639                .index = rt + i,
 640             };
 641          }
 642
 643          num_rts += array_len;
 644       }
 645
 646       if (num_rts == 0) {
 647          /* If we have no render targets, we need a null render target */
 648          rt_bindings[0] = (struct anv_pipeline_binding) {
 649             .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
 650             .binding = 0,
 651             .index = UINT8_MAX,
 652          };
 653          num_rts = 1;
 654       }
 655
 656       assert(num_rts <= 8);
 657       map.surface_to_descriptor -= num_rts;
 658       map.surface_count += num_rts;
 659       assert(map.surface_count <= 256);
 660       memcpy(map.surface_to_descriptor, rt_bindings,
 661              num_rts * sizeof(*rt_bindings));
 662
 663       anv_fill_binding_table(&prog_data.base, num_rts);
 664
 665       void *mem_ctx = ralloc_context(NULL);
 666
 667       ralloc_steal(mem_ctx, nir);
 668
 669       unsigned code_size;
 670       const unsigned *shader_code =
 671          brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
 672                         NULL, -1, -1, true, false, NULL, &code_size, NULL);
 673       if (shader_code == NULL) {
 674          ralloc_free(mem_ctx);
 675          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 676       }
 677
 678       bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
 679                                        shader_code, code_size,
 680                                        &prog_data.base, sizeof(prog_data),
 681                                        &map);
 682       if (!bin) {
 683          ralloc_free(mem_ctx);
 684          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 685       }
 686
 687       ralloc_free(mem_ctx);
 688    }
 689
 690    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
 691
 692    return VK_SUCCESS;
 693 }
 694
 695 VkResult
 696 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 697                         struct anv_pipeline_cache *cache,
 698                         const VkComputePipelineCreateInfo *info,
 699                         struct anv_shader_module *module,
 700                         const char *entrypoint,
 701                         const VkSpecializationInfo *spec_info)
 702 {
 703    const struct brw_compiler *compiler =
 704       pipeline->device->instance->physicalDevice.compiler;
 705    struct anv_pipeline_bind_map map;
 706    struct brw_cs_prog_key key;
 707    struct anv_shader_bin *bin = NULL;
 708    unsigned char sha1[20];
 709
 710    populate_cs_prog_key(&pipeline->device->info, &key);
 711
 712    if (cache) {
 713       anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
 714                       pipeline->layout, spec_info);
 715       bin = anv_pipeline_cache_search(cache, sha1, 20);
 716    }
 717
 718    if (bin == NULL) {
 719       struct brw_cs_prog_data prog_data = { 0, };
 720       struct anv_pipeline_binding surface_to_descriptor[256];
 721       struct anv_pipeline_binding sampler_to_descriptor[256];
 722
 723       map = (struct anv_pipeline_bind_map) {
 724          .surface_to_descriptor = surface_to_descriptor,
 725          .sampler_to_descriptor = sampler_to_descriptor
 726       };
 727
 728       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
 729                                              MESA_SHADER_COMPUTE, spec_info,
 730                                              &prog_data.base, &map);
 731       if (nir == NULL)
 732          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 733
 734       anv_fill_binding_table(&prog_data.base, 1);
 735
 736       void *mem_ctx = ralloc_context(NULL);
 737
 738       ralloc_steal(mem_ctx, nir);
 739
 740       unsigned code_size;
 741       const unsigned *shader_code =
 742          brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
 743                         -1, &code_size, NULL);
 744       if (shader_code == NULL) {
 745          ralloc_free(mem_ctx);
 746          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 747       }
 748
 749       bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
 750                                        shader_code, code_size,
 751                                        &prog_data.base, sizeof(prog_data),
 752                                        &map);
 753       if (!bin) {
 754          ralloc_free(mem_ctx);
 755          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 756       }
 757
 758       ralloc_free(mem_ctx);
 759    }
 760
 761    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
 762
 763    return VK_SUCCESS;
 764 }
 765
 766 /**
 767  * Copy pipeline state not marked as dynamic.
 768  * Dynamic state is pipeline state which hasn't been provided at pipeline
 769  * creation time, but is dynamically provided afterwards using various
 770  * vkCmdSet* functions.
 771  *
 772  * The set of state considered "non_dynamic" is determined by the pieces of
 773  * state that have their corresponding VkDynamicState enums omitted from
 774  * VkPipelineDynamicStateCreateInfo::pDynamicStates.
 775  *
 776  * @param[out] pipeline    Destination non_dynamic state.
 777  * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
 778  */
 779 static void
 780 copy_non_dynamic_state(struct anv_pipeline *pipeline,
 781                        const VkGraphicsPipelineCreateInfo *pCreateInfo)
 782 {
 783    anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
 784    ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
 785    struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
 786
 787    pipeline->dynamic_state = default_dynamic_state;
 788
 789    if (pCreateInfo->pDynamicState) {
 790       /* Remove all of the states that are marked as dynamic */
 791       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
 792       for (uint32_t s = 0; s < count; s++)
 793          states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
 794    }
 795
 796    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
 797
 798    /* Section 9.2 of the Vulkan 1.0.15 spec says:
 799     *
 800     *    pViewportState is [...] NULL if the pipeline
 801     *    has rasterization disabled.
 802     */
 803    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
 804       assert(pCreateInfo->pViewportState);
 805
 806       dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
 807       if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
 808          typed_memcpy(dynamic->viewport.viewports,
 809                      pCreateInfo->pViewportState->pViewports,
 810                      pCreateInfo->pViewportState->viewportCount);
 811       }
 812
 813       dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
 814       if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
 815          typed_memcpy(dynamic->scissor.scissors,
 816                      pCreateInfo->pViewportState->pScissors,
 817                      pCreateInfo->pViewportState->scissorCount);
 818       }
 819    }
 820
 821    if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
 822       assert(pCreateInfo->pRasterizationState);
 823       dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
 824    }
 825
 826    if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
 827       assert(pCreateInfo->pRasterizationState);
 828       dynamic->depth_bias.bias =
 829          pCreateInfo->pRasterizationState->depthBiasConstantFactor;
 830       dynamic->depth_bias.clamp =
 831          pCreateInfo->pRasterizationState->depthBiasClamp;
 832       dynamic->depth_bias.slope =
 833          pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
 834    }
 835
 836    /* Section 9.2 of the Vulkan 1.0.15 spec says:
 837     *
 838     *    pColorBlendState is [...] NULL if the pipeline has rasterization
 839     *    disabled or if the subpass of the render pass the pipeline is
 840     *    created against does not use any color attachments.
 841     */
 842    bool uses_color_att = false;
 843    for (unsigned i = 0; i < subpass->color_count; ++i) {
 844       if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED) {
 845          uses_color_att = true;
 846          break;
 847       }
 848    }
 849
 850    if (uses_color_att &&
 851        !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
 852       assert(pCreateInfo->pColorBlendState);
 853
 854       if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
 855          typed_memcpy(dynamic->blend_constants,
 856                      pCreateInfo->pColorBlendState->blendConstants, 4);
 857    }
 858
 859    /* If there is no depthstencil attachment, then don't read
 860     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
 861     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
 862     * no need to override the depthstencil defaults in
 863     * anv_pipeline::dynamic_state when there is no depthstencil attachment.
 864     *
 865     * Section 9.2 of the Vulkan 1.0.15 spec says:
 866     *
 867     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
 868     *    disabled or if the subpass of the render pass the pipeline is created
 869     *    against does not use a depth/stencil attachment.
 870     */
 871    if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
 872        subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
 873       assert(pCreateInfo->pDepthStencilState);
 874
 875       if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
 876          dynamic->depth_bounds.min =
 877             pCreateInfo->pDepthStencilState->minDepthBounds;
 878          dynamic->depth_bounds.max =
 879             pCreateInfo->pDepthStencilState->maxDepthBounds;
 880       }
 881
 882       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
 883          dynamic->stencil_compare_mask.front =
 884             pCreateInfo->pDepthStencilState->front.compareMask;
 885          dynamic->stencil_compare_mask.back =
 886             pCreateInfo->pDepthStencilState->back.compareMask;
 887       }
 888
 889       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
 890          dynamic->stencil_write_mask.front =
 891             pCreateInfo->pDepthStencilState->front.writeMask;
 892          dynamic->stencil_write_mask.back =
 893             pCreateInfo->pDepthStencilState->back.writeMask;
 894       }
 895
 896       if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
 897          dynamic->stencil_reference.front =
 898             pCreateInfo->pDepthStencilState->front.reference;
 899          dynamic->stencil_reference.back =
 900             pCreateInfo->pDepthStencilState->back.reference;
 901       }
 902    }
 903
 904    pipeline->dynamic_state_mask = states;
 905 }
 906
 907 static void
 908 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
 909 {
 910    struct anv_render_pass *renderpass = NULL;
 911    struct anv_subpass *subpass = NULL;
 912
 913    /* Assert that all required members of VkGraphicsPipelineCreateInfo are
 914     * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
 915     */
 916    assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
 917
 918    renderpass = anv_render_pass_from_handle(info->renderPass);
 919    assert(renderpass);
 920
 921    assert(info->subpass < renderpass->subpass_count);
 922    subpass = &renderpass->subpasses[info->subpass];
 923
 924    assert(info->stageCount >= 1);
 925    assert(info->pVertexInputState);
 926    assert(info->pInputAssemblyState);
 927    assert(info->pRasterizationState);
 928    if (!info->pRasterizationState->rasterizerDiscardEnable) {
 929       assert(info->pViewportState);
 930       assert(info->pMultisampleState);
 931
 932       if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
 933          assert(info->pDepthStencilState);
 934
 935       if (subpass && subpass->color_count > 0)
 936          assert(info->pColorBlendState);
 937    }
 938
 939    for (uint32_t i = 0; i < info->stageCount; ++i) {
 940       switch (info->pStages[i].stage) {
 941       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
 942       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
 943          assert(info->pTessellationState);
 944          break;
 945       default:
 946          break;
 947       }
 948    }
 949 }
 950
 951 /**
 952  * Calculate the desired L3 partitioning based on the current state of the
 953  * pipeline.  For now this simply returns the conservative defaults calculated
 954  * by get_default_l3_weights(), but we could probably do better by gathering
 955  * more statistics from the pipeline state (e.g. guess of expected URB usage
 956  * and bound surfaces), or by using feed-back from performance counters.
 957  */
 958 void
 959 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
 960 {
 961    const struct gen_device_info *devinfo = &pipeline->device->info;
 962
 963    const struct gen_l3_weights w =
 964       gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
 965
 966    pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
 967    pipeline->urb.total_size =
 968       gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
 969 }
 970
 971 VkResult
 972 anv_pipeline_init(struct anv_pipeline *pipeline,
 973                   struct anv_device *device,
 974                   struct anv_pipeline_cache *cache,
 975                   const VkGraphicsPipelineCreateInfo *pCreateInfo,
 976                   const VkAllocationCallbacks *alloc)
 977 {
 978    VkResult result;
 979
 980    anv_validate {
 981       anv_pipeline_validate_create_info(pCreateInfo);
 982    }
 983
 984    if (alloc == NULL)
 985       alloc = &device->alloc;
 986
 987    pipeline->device = device;
 988    pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
 989
 990    result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
 991    if (result != VK_SUCCESS)
 992       return result;
 993
 994    pipeline->batch.alloc = alloc;
 995    pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
 996    pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
 997    pipeline->batch.relocs = &pipeline->batch_relocs;
 998
 999    copy_non_dynamic_state(pipeline, pCreateInfo);
1000    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1001                                   pCreateInfo->pRasterizationState->depthClampEnable;
1002
1003    pipeline->needs_data_cache = false;
1004
1005    /* When we free the pipeline, we detect stages based on the NULL status
1006     * of various prog_data pointers.  Make them NULL by default.
1007     */
1008    memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1009
1010    pipeline->active_stages = 0;
1011
1012    const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
1013    struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
1014    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
1015       gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
1016       pStages[stage] = &pCreateInfo->pStages[i];
1017       modules[stage] = anv_shader_module_from_handle(pStages[stage]->module);
1018    }
1019
1020    if (modules[MESA_SHADER_VERTEX]) {
1021       result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
1022                                        modules[MESA_SHADER_VERTEX],
1023                                        pStages[MESA_SHADER_VERTEX]->pName,
1024                                        pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
1025       if (result != VK_SUCCESS)
1026          goto compile_fail;
1027    }
1028
1029    if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL])
1030       anv_finishme("no tessellation support");
1031
1032    if (modules[MESA_SHADER_GEOMETRY]) {
1033       result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
1034                                        modules[MESA_SHADER_GEOMETRY],
1035                                        pStages[MESA_SHADER_GEOMETRY]->pName,
1036                                        pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
1037       if (result != VK_SUCCESS)
1038          goto compile_fail;
1039    }
1040
1041    if (modules[MESA_SHADER_FRAGMENT]) {
1042       result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo,
1043                                        modules[MESA_SHADER_FRAGMENT],
1044                                        pStages[MESA_SHADER_FRAGMENT]->pName,
1045                                        pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
1046       if (result != VK_SUCCESS)
1047          goto compile_fail;
1048    }
1049
1050    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1051
1052    anv_pipeline_setup_l3_config(pipeline, false);
1053
1054    const VkPipelineVertexInputStateCreateInfo *vi_info =
1055       pCreateInfo->pVertexInputState;
1056
1057    const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1058
1059    pipeline->vb_used = 0;
1060    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1061       const VkVertexInputAttributeDescription *desc =
1062          &vi_info->pVertexAttributeDescriptions[i];
1063
1064       if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location)))
1065          pipeline->vb_used |= 1 << desc->binding;
1066    }
1067
1068    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1069       const VkVertexInputBindingDescription *desc =
1070          &vi_info->pVertexBindingDescriptions[i];
1071
1072       pipeline->binding_stride[desc->binding] = desc->stride;
1073
1074       /* Step rate is programmed per vertex element (attribute), not
1075        * binding. Set up a map of which bindings step per instance, for
1076        * reference by vertex element setup. */
1077       switch (desc->inputRate) {
1078       default:
1079       case VK_VERTEX_INPUT_RATE_VERTEX:
1080          pipeline->instancing_enable[desc->binding] = false;
1081          break;
1082       case VK_VERTEX_INPUT_RATE_INSTANCE:
1083          pipeline->instancing_enable[desc->binding] = true;
1084          break;
1085       }
1086    }
1087
1088    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1089       pCreateInfo->pInputAssemblyState;
1090    pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1091    pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1092
1093    return VK_SUCCESS;
1094
1095 compile_fail:
1096    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1097       if (pipeline->shaders[s])
1098          anv_shader_bin_unref(device, pipeline->shaders[s]);
1099    }
1100
1101    anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1102
1103    return result;
1104 }