anv: Implement VK_EXT_line_rasterization
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/gen_l3_config.h"
33 #include "anv_private.h"
34 #include "compiler/brw_nir.h"
35 #include "anv_nir.h"
36 #include "nir/nir_xfb_info.h"
37 #include "spirv/nir_spirv.h"
38 #include "vk_util.h"
39
40 /* Needed for SWIZZLE macros */
41 #include "program/prog_instruction.h"
42
43 // Shader functions
44
45 VkResult anv_CreateShaderModule(
46 VkDevice _device,
47 const VkShaderModuleCreateInfo* pCreateInfo,
48 const VkAllocationCallbacks* pAllocator,
49 VkShaderModule* pShaderModule)
50 {
51 ANV_FROM_HANDLE(anv_device, device, _device);
52 struct anv_shader_module *module;
53
54 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
55 assert(pCreateInfo->flags == 0);
56
57 module = vk_alloc2(&device->alloc, pAllocator,
58 sizeof(*module) + pCreateInfo->codeSize, 8,
59 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
60 if (module == NULL)
61 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
62
63 module->size = pCreateInfo->codeSize;
64 memcpy(module->data, pCreateInfo->pCode, module->size);
65
66 _mesa_sha1_compute(module->data, module->size, module->sha1);
67
68 *pShaderModule = anv_shader_module_to_handle(module);
69
70 return VK_SUCCESS;
71 }
72
73 void anv_DestroyShaderModule(
74 VkDevice _device,
75 VkShaderModule _module,
76 const VkAllocationCallbacks* pAllocator)
77 {
78 ANV_FROM_HANDLE(anv_device, device, _device);
79 ANV_FROM_HANDLE(anv_shader_module, module, _module);
80
81 if (!module)
82 return;
83
84 vk_free2(&device->alloc, pAllocator, module);
85 }
86
87 #define SPIR_V_MAGIC_NUMBER 0x07230203
88
89 static const uint64_t stage_to_debug[] = {
90 [MESA_SHADER_VERTEX] = DEBUG_VS,
91 [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
92 [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
93 [MESA_SHADER_GEOMETRY] = DEBUG_GS,
94 [MESA_SHADER_FRAGMENT] = DEBUG_WM,
95 [MESA_SHADER_COMPUTE] = DEBUG_CS,
96 };
97
98 struct anv_spirv_debug_data {
99 struct anv_device *device;
100 const struct anv_shader_module *module;
101 };
102
103 static void anv_spirv_nir_debug(void *private_data,
104 enum nir_spirv_debug_level level,
105 size_t spirv_offset,
106 const char *message)
107 {
108 struct anv_spirv_debug_data *debug_data = private_data;
109 static const VkDebugReportFlagsEXT vk_flags[] = {
110 [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
111 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
112 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
113 };
114 char buffer[256];
115
116 snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
117
118 vk_debug_report(&debug_data->device->instance->debug_report_callbacks,
119 vk_flags[level],
120 VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
121 (uint64_t) (uintptr_t) debug_data->module,
122 0, 0, "anv", buffer);
123 }
124
125 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
126 * we can't do that yet because we don't have the ability to copy nir.
127 */
128 static nir_shader *
129 anv_shader_compile_to_nir(struct anv_device *device,
130 void *mem_ctx,
131 const struct anv_shader_module *module,
132 const char *entrypoint_name,
133 gl_shader_stage stage,
134 const VkSpecializationInfo *spec_info)
135 {
136 const struct anv_physical_device *pdevice =
137 &device->instance->physicalDevice;
138 const struct brw_compiler *compiler = pdevice->compiler;
139 const nir_shader_compiler_options *nir_options =
140 compiler->glsl_compiler_options[stage].NirOptions;
141
142 uint32_t *spirv = (uint32_t *) module->data;
143 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
144 assert(module->size % 4 == 0);
145
146 uint32_t num_spec_entries = 0;
147 struct nir_spirv_specialization *spec_entries = NULL;
148 if (spec_info && spec_info->mapEntryCount > 0) {
149 num_spec_entries = spec_info->mapEntryCount;
150 spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
151 for (uint32_t i = 0; i < num_spec_entries; i++) {
152 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
153 const void *data = spec_info->pData + entry.offset;
154 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
155
156 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
157 if (spec_info->dataSize == 8)
158 spec_entries[i].data64 = *(const uint64_t *)data;
159 else
160 spec_entries[i].data32 = *(const uint32_t *)data;
161 }
162 }
163
164 struct anv_spirv_debug_data spirv_debug_data = {
165 .device = device,
166 .module = module,
167 };
168 struct spirv_to_nir_options spirv_options = {
169 .lower_workgroup_access_to_offsets = true,
170 .frag_coord_is_sysval = true,
171 .caps = {
172 .demote_to_helper_invocation = true,
173 .derivative_group = true,
174 .descriptor_array_dynamic_indexing = true,
175 .descriptor_array_non_uniform_indexing = true,
176 .descriptor_indexing = true,
177 .device_group = true,
178 .draw_parameters = true,
179 .float16 = pdevice->info.gen >= 8,
180 .float64 = pdevice->info.gen >= 8,
181 .fragment_shader_sample_interlock = pdevice->info.gen >= 9,
182 .fragment_shader_pixel_interlock = pdevice->info.gen >= 9,
183 .geometry_streams = true,
184 .image_write_without_format = true,
185 .int8 = pdevice->info.gen >= 8,
186 .int16 = pdevice->info.gen >= 8,
187 .int64 = pdevice->info.gen >= 8,
188 .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
189 .min_lod = true,
190 .multiview = true,
191 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
192 .post_depth_coverage = pdevice->info.gen >= 9,
193 .runtime_descriptor_array = true,
194 .shader_viewport_index_layer = true,
195 .stencil_export = pdevice->info.gen >= 9,
196 .storage_8bit = pdevice->info.gen >= 8,
197 .storage_16bit = pdevice->info.gen >= 8,
198 .subgroup_arithmetic = true,
199 .subgroup_basic = true,
200 .subgroup_ballot = true,
201 .subgroup_quad = true,
202 .subgroup_shuffle = true,
203 .subgroup_vote = true,
204 .tessellation = true,
205 .transform_feedback = pdevice->info.gen >= 8,
206 .variable_pointers = true,
207 },
208 .ubo_addr_format = nir_address_format_32bit_index_offset,
209 .ssbo_addr_format =
210 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
211 .phys_ssbo_addr_format = nir_address_format_64bit_global,
212 .push_const_addr_format = nir_address_format_logical,
213
214 /* TODO: Consider changing this to an address format that has the NULL
215 * pointer equals to 0. That might be a better format to play nice
216 * with certain code / code generators.
217 */
218 .shared_addr_format = nir_address_format_32bit_offset,
219 .debug = {
220 .func = anv_spirv_nir_debug,
221 .private_data = &spirv_debug_data,
222 },
223 };
224
225
226 nir_shader *nir =
227 spirv_to_nir(spirv, module->size / 4,
228 spec_entries, num_spec_entries,
229 stage, entrypoint_name, &spirv_options, nir_options);
230 assert(nir->info.stage == stage);
231 nir_validate_shader(nir, "after spirv_to_nir");
232 ralloc_steal(mem_ctx, nir);
233
234 free(spec_entries);
235
236 if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
237 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
238 gl_shader_stage_name(stage));
239 nir_print_shader(nir, stderr);
240 }
241
242 /* We have to lower away local constant initializers right before we
243 * inline functions. That way they get properly initialized at the top
244 * of the function and not at the top of its caller.
245 */
246 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
247 NIR_PASS_V(nir, nir_lower_returns);
248 NIR_PASS_V(nir, nir_inline_functions);
249 NIR_PASS_V(nir, nir_opt_deref);
250
251 /* Pick off the single entrypoint that we want */
252 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
253 if (!func->is_entrypoint)
254 exec_node_remove(&func->node);
255 }
256 assert(exec_list_length(&nir->functions) == 1);
257
258 /* Now that we've deleted all but the main function, we can go ahead and
259 * lower the rest of the constant initializers. We do this here so that
260 * nir_remove_dead_variables and split_per_member_structs below see the
261 * corresponding stores.
262 */
263 NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
264
265 /* Split member structs. We do this before lower_io_to_temporaries so that
266 * it doesn't lower system values to temporaries by accident.
267 */
268 NIR_PASS_V(nir, nir_split_var_copies);
269 NIR_PASS_V(nir, nir_split_per_member_structs);
270
271 NIR_PASS_V(nir, nir_remove_dead_variables,
272 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
273
274 NIR_PASS_V(nir, nir_propagate_invariant);
275 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
276 nir_shader_get_entrypoint(nir), true, false);
277
278 NIR_PASS_V(nir, nir_lower_frexp);
279
280 /* Vulkan uses the separate-shader linking model */
281 nir->info.separate_shader = true;
282
283 brw_preprocess_nir(compiler, nir, NULL);
284
285 return nir;
286 }
287
288 void anv_DestroyPipeline(
289 VkDevice _device,
290 VkPipeline _pipeline,
291 const VkAllocationCallbacks* pAllocator)
292 {
293 ANV_FROM_HANDLE(anv_device, device, _device);
294 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
295
296 if (!pipeline)
297 return;
298
299 anv_reloc_list_finish(&pipeline->batch_relocs,
300 pAllocator ? pAllocator : &device->alloc);
301 if (pipeline->blend_state.map)
302 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
303
304 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
305 if (pipeline->shaders[s])
306 anv_shader_bin_unref(device, pipeline->shaders[s]);
307 }
308
309 vk_free2(&device->alloc, pAllocator, pipeline);
310 }
311
312 static const uint32_t vk_to_gen_primitive_type[] = {
313 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
314 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
315 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
316 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
317 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
318 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
319 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
320 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
321 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
322 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
323 };
324
325 static void
326 populate_sampler_prog_key(const struct gen_device_info *devinfo,
327 struct brw_sampler_prog_key_data *key)
328 {
329 /* Almost all multisampled textures are compressed. The only time when we
330 * don't compress a multisampled texture is for 16x MSAA with a surface
331 * width greater than 8k which is a bit of an edge case. Since the sampler
332 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
333 * to tell the compiler to always assume compression.
334 */
335 key->compressed_multisample_layout_mask = ~0;
336
337 /* SkyLake added support for 16x MSAA. With this came a new message for
338 * reading from a 16x MSAA surface with compression. The new message was
339 * needed because now the MCS data is 64 bits instead of 32 or lower as is
340 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
341 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
342 * so we can just use it unconditionally. This may not be quite as
343 * efficient but it saves us from recompiling.
344 */
345 if (devinfo->gen >= 9)
346 key->msaa_16 = ~0;
347
348 /* XXX: Handle texture swizzle on HSW- */
349 for (int i = 0; i < MAX_SAMPLERS; i++) {
350 /* Assume color sampler, no swizzling. (Works for BDW+) */
351 key->swizzles[i] = SWIZZLE_XYZW;
352 }
353 }
354
355 static void
356 populate_base_prog_key(const struct gen_device_info *devinfo,
357 VkPipelineShaderStageCreateFlags flags,
358 struct brw_base_prog_key *key)
359 {
360 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
361 key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
362 else
363 key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
364
365 populate_sampler_prog_key(devinfo, &key->tex);
366 }
367
368 static void
369 populate_vs_prog_key(const struct gen_device_info *devinfo,
370 VkPipelineShaderStageCreateFlags flags,
371 struct brw_vs_prog_key *key)
372 {
373 memset(key, 0, sizeof(*key));
374
375 populate_base_prog_key(devinfo, flags, &key->base);
376
377 /* XXX: Handle vertex input work-arounds */
378
379 /* XXX: Handle sampler_prog_key */
380 }
381
382 static void
383 populate_tcs_prog_key(const struct gen_device_info *devinfo,
384 VkPipelineShaderStageCreateFlags flags,
385 unsigned input_vertices,
386 struct brw_tcs_prog_key *key)
387 {
388 memset(key, 0, sizeof(*key));
389
390 populate_base_prog_key(devinfo, flags, &key->base);
391
392 key->input_vertices = input_vertices;
393 }
394
395 static void
396 populate_tes_prog_key(const struct gen_device_info *devinfo,
397 VkPipelineShaderStageCreateFlags flags,
398 struct brw_tes_prog_key *key)
399 {
400 memset(key, 0, sizeof(*key));
401
402 populate_base_prog_key(devinfo, flags, &key->base);
403 }
404
405 static void
406 populate_gs_prog_key(const struct gen_device_info *devinfo,
407 VkPipelineShaderStageCreateFlags flags,
408 struct brw_gs_prog_key *key)
409 {
410 memset(key, 0, sizeof(*key));
411
412 populate_base_prog_key(devinfo, flags, &key->base);
413 }
414
415 static void
416 populate_wm_prog_key(const struct gen_device_info *devinfo,
417 VkPipelineShaderStageCreateFlags flags,
418 const struct anv_subpass *subpass,
419 const VkPipelineMultisampleStateCreateInfo *ms_info,
420 struct brw_wm_prog_key *key)
421 {
422 memset(key, 0, sizeof(*key));
423
424 populate_base_prog_key(devinfo, flags, &key->base);
425
426 /* We set this to 0 here and set to the actual value before we call
427 * brw_compile_fs.
428 */
429 key->input_slots_valid = 0;
430
431 /* Vulkan doesn't specify a default */
432 key->high_quality_derivatives = false;
433
434 /* XXX Vulkan doesn't appear to specify */
435 key->clamp_fragment_color = false;
436
437 assert(subpass->color_count <= MAX_RTS);
438 for (uint32_t i = 0; i < subpass->color_count; i++) {
439 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
440 key->color_outputs_valid |= (1 << i);
441 }
442
443 key->nr_color_regions = util_bitcount(key->color_outputs_valid);
444
445 /* To reduce possible shader recompilations we would need to know if
446 * there is a SampleMask output variable to compute if we should emit
447 * code to workaround the issue that hardware disables alpha to coverage
448 * when there is SampleMask output.
449 */
450 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
451
452 /* Vulkan doesn't support fixed-function alpha test */
453 key->alpha_test_replicate_alpha = false;
454
455 if (ms_info) {
456 /* We should probably pull this out of the shader, but it's fairly
457 * harmless to compute it and then let dead-code take care of it.
458 */
459 if (ms_info->rasterizationSamples > 1) {
460 key->persample_interp = ms_info->sampleShadingEnable &&
461 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
462 key->multisample_fbo = true;
463 }
464
465 key->frag_coord_adds_sample_pos = key->persample_interp;
466 }
467 }
468
469 static void
470 populate_cs_prog_key(const struct gen_device_info *devinfo,
471 VkPipelineShaderStageCreateFlags flags,
472 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
473 struct brw_cs_prog_key *key)
474 {
475 memset(key, 0, sizeof(*key));
476
477 populate_base_prog_key(devinfo, flags, &key->base);
478
479 if (rss_info) {
480 assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
481
482 /* These enum values are expressly chosen to be equal to the subgroup
483 * size that they require.
484 */
485 assert(rss_info->requiredSubgroupSize == 8 ||
486 rss_info->requiredSubgroupSize == 16 ||
487 rss_info->requiredSubgroupSize == 32);
488 key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
489 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
490 /* If the client expressly requests full subgroups and they don't
491 * specify a subgroup size, we need to pick one. If they're requested
492 * varying subgroup sizes, we set it to UNIFORM and let the back-end
493 * compiler pick. Otherwise, we specify the API value of 32.
494 * Performance will likely be terrible in this case but there's nothing
495 * we can do about that. The client should have chosen a size.
496 */
497 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
498 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
499 else
500 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
501 }
502 }
503
504 struct anv_pipeline_stage {
505 gl_shader_stage stage;
506
507 const struct anv_shader_module *module;
508 const char *entrypoint;
509 const VkSpecializationInfo *spec_info;
510
511 unsigned char shader_sha1[20];
512
513 union brw_any_prog_key key;
514
515 struct {
516 gl_shader_stage stage;
517 unsigned char sha1[20];
518 } cache_key;
519
520 nir_shader *nir;
521
522 struct anv_pipeline_binding surface_to_descriptor[256];
523 struct anv_pipeline_binding sampler_to_descriptor[256];
524 struct anv_pipeline_bind_map bind_map;
525
526 union brw_any_prog_data prog_data;
527
528 VkPipelineCreationFeedbackEXT feedback;
529 };
530
531 static void
532 anv_pipeline_hash_shader(const struct anv_shader_module *module,
533 const char *entrypoint,
534 gl_shader_stage stage,
535 const VkSpecializationInfo *spec_info,
536 unsigned char *sha1_out)
537 {
538 struct mesa_sha1 ctx;
539 _mesa_sha1_init(&ctx);
540
541 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
542 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
543 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
544 if (spec_info) {
545 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
546 spec_info->mapEntryCount *
547 sizeof(*spec_info->pMapEntries));
548 _mesa_sha1_update(&ctx, spec_info->pData,
549 spec_info->dataSize);
550 }
551
552 _mesa_sha1_final(&ctx, sha1_out);
553 }
554
555 static void
556 anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
557 struct anv_pipeline_layout *layout,
558 struct anv_pipeline_stage *stages,
559 unsigned char *sha1_out)
560 {
561 struct mesa_sha1 ctx;
562 _mesa_sha1_init(&ctx);
563
564 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
565 sizeof(pipeline->subpass->view_mask));
566
567 if (layout)
568 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
569
570 const bool rba = pipeline->device->robust_buffer_access;
571 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
572
573 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
574 if (stages[s].entrypoint) {
575 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
576 sizeof(stages[s].shader_sha1));
577 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
578 }
579 }
580
581 _mesa_sha1_final(&ctx, sha1_out);
582 }
583
584 static void
585 anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
586 struct anv_pipeline_layout *layout,
587 struct anv_pipeline_stage *stage,
588 unsigned char *sha1_out)
589 {
590 struct mesa_sha1 ctx;
591 _mesa_sha1_init(&ctx);
592
593 if (layout)
594 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
595
596 const bool rba = pipeline->device->robust_buffer_access;
597 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
598
599 _mesa_sha1_update(&ctx, stage->shader_sha1,
600 sizeof(stage->shader_sha1));
601 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
602
603 _mesa_sha1_final(&ctx, sha1_out);
604 }
605
606 static nir_shader *
607 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
608 struct anv_pipeline_cache *cache,
609 void *mem_ctx,
610 struct anv_pipeline_stage *stage)
611 {
612 const struct brw_compiler *compiler =
613 pipeline->device->instance->physicalDevice.compiler;
614 const nir_shader_compiler_options *nir_options =
615 compiler->glsl_compiler_options[stage->stage].NirOptions;
616 nir_shader *nir;
617
618 nir = anv_device_search_for_nir(pipeline->device, cache,
619 nir_options,
620 stage->shader_sha1,
621 mem_ctx);
622 if (nir) {
623 assert(nir->info.stage == stage->stage);
624 return nir;
625 }
626
627 nir = anv_shader_compile_to_nir(pipeline->device,
628 mem_ctx,
629 stage->module,
630 stage->entrypoint,
631 stage->stage,
632 stage->spec_info);
633 if (nir) {
634 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
635 return nir;
636 }
637
638 return NULL;
639 }
640
641 static void
642 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
643 void *mem_ctx,
644 struct anv_pipeline_stage *stage,
645 struct anv_pipeline_layout *layout)
646 {
647 const struct anv_physical_device *pdevice =
648 &pipeline->device->instance->physicalDevice;
649 const struct brw_compiler *compiler = pdevice->compiler;
650
651 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
652 nir_shader *nir = stage->nir;
653
654 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
655 NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
656 NIR_PASS_V(nir, nir_lower_input_attachments, true);
657 }
658
659 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
660
661 NIR_PASS_V(nir, anv_nir_lower_push_constants);
662
663 if (nir->info.stage != MESA_SHADER_COMPUTE)
664 NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
665
666 if (nir->info.stage == MESA_SHADER_COMPUTE)
667 prog_data->total_shared = nir->num_shared;
668
669 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
670
671 if (nir->num_uniforms > 0) {
672 assert(prog_data->nr_params == 0);
673
674 /* If the shader uses any push constants at all, we'll just give
675 * them the maximum possible number
676 */
677 assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
678 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
679 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
680 prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
681
682 /* We now set the param values to be offsets into a
683 * anv_push_constant_data structure. Since the compiler doesn't
684 * actually dereference any of the gl_constant_value pointers in the
685 * params array, it doesn't really matter what we put here.
686 */
687 struct anv_push_constants *null_data = NULL;
688 /* Fill out the push constants section of the param array */
689 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
690 prog_data->param[i] = ANV_PARAM_PUSH(
691 (uintptr_t)&null_data->client_data[i * sizeof(float)]);
692 }
693 }
694
695 if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
696 pipeline->needs_data_cache = true;
697
698 NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
699
700 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
701 nir_address_format_64bit_global);
702
703 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
704 if (layout) {
705 anv_nir_apply_pipeline_layout(pdevice,
706 pipeline->device->robust_buffer_access,
707 layout, nir, prog_data,
708 &stage->bind_map);
709
710 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
711 nir_address_format_32bit_index_offset);
712 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
713 anv_nir_ssbo_addr_format(pdevice,
714 pipeline->device->robust_buffer_access));
715
716 NIR_PASS_V(nir, nir_opt_constant_folding);
717
718 /* We don't support non-uniform UBOs and non-uniform SSBO access is
719 * handled naturally by falling back to A64 messages.
720 */
721 NIR_PASS_V(nir, nir_lower_non_uniform_access,
722 nir_lower_non_uniform_texture_access |
723 nir_lower_non_uniform_image_access);
724 }
725
726 if (nir->info.stage != MESA_SHADER_COMPUTE)
727 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
728
729 assert(nir->num_uniforms == prog_data->nr_params * 4);
730
731 stage->nir = nir;
732 }
733
734 static void
735 anv_pipeline_link_vs(const struct brw_compiler *compiler,
736 struct anv_pipeline_stage *vs_stage,
737 struct anv_pipeline_stage *next_stage)
738 {
739 if (next_stage)
740 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
741 }
742
743 static const unsigned *
744 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
745 void *mem_ctx,
746 struct anv_device *device,
747 struct anv_pipeline_stage *vs_stage)
748 {
749 brw_compute_vue_map(compiler->devinfo,
750 &vs_stage->prog_data.vs.base.vue_map,
751 vs_stage->nir->info.outputs_written,
752 vs_stage->nir->info.separate_shader);
753
754 return brw_compile_vs(compiler, device, mem_ctx, &vs_stage->key.vs,
755 &vs_stage->prog_data.vs, vs_stage->nir, -1, NULL);
756 }
757
758 static void
759 merge_tess_info(struct shader_info *tes_info,
760 const struct shader_info *tcs_info)
761 {
762 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
763 *
764 * "PointMode. Controls generation of points rather than triangles
765 * or lines. This functionality defaults to disabled, and is
766 * enabled if either shader stage includes the execution mode.
767 *
768 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
769 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
770 * and OutputVertices, it says:
771 *
772 * "One mode must be set in at least one of the tessellation
773 * shader stages."
774 *
775 * So, the fields can be set in either the TCS or TES, but they must
776 * agree if set in both. Our backend looks at TES, so bitwise-or in
777 * the values from the TCS.
778 */
779 assert(tcs_info->tess.tcs_vertices_out == 0 ||
780 tes_info->tess.tcs_vertices_out == 0 ||
781 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
782 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
783
784 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
785 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
786 tcs_info->tess.spacing == tes_info->tess.spacing);
787 tes_info->tess.spacing |= tcs_info->tess.spacing;
788
789 assert(tcs_info->tess.primitive_mode == 0 ||
790 tes_info->tess.primitive_mode == 0 ||
791 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
792 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
793 tes_info->tess.ccw |= tcs_info->tess.ccw;
794 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
795 }
796
797 static void
798 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
799 struct anv_pipeline_stage *tcs_stage,
800 struct anv_pipeline_stage *tes_stage)
801 {
802 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
803
804 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
805
806 nir_lower_patch_vertices(tes_stage->nir,
807 tcs_stage->nir->info.tess.tcs_vertices_out,
808 NULL);
809
810 /* Copy TCS info into the TES info */
811 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
812
813 /* Whacking the key after cache lookup is a bit sketchy, but all of
814 * this comes from the SPIR-V, which is part of the hash used for the
815 * pipeline cache. So it should be safe.
816 */
817 tcs_stage->key.tcs.tes_primitive_mode =
818 tes_stage->nir->info.tess.primitive_mode;
819 tcs_stage->key.tcs.quads_workaround =
820 compiler->devinfo->gen < 9 &&
821 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
822 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
823 }
824
825 static const unsigned *
826 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
827 void *mem_ctx,
828 struct anv_device *device,
829 struct anv_pipeline_stage *tcs_stage,
830 struct anv_pipeline_stage *prev_stage)
831 {
832 tcs_stage->key.tcs.outputs_written =
833 tcs_stage->nir->info.outputs_written;
834 tcs_stage->key.tcs.patch_outputs_written =
835 tcs_stage->nir->info.patch_outputs_written;
836
837 return brw_compile_tcs(compiler, device, mem_ctx, &tcs_stage->key.tcs,
838 &tcs_stage->prog_data.tcs, tcs_stage->nir,
839 -1, NULL);
840 }
841
842 static void
843 anv_pipeline_link_tes(const struct brw_compiler *compiler,
844 struct anv_pipeline_stage *tes_stage,
845 struct anv_pipeline_stage *next_stage)
846 {
847 if (next_stage)
848 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
849 }
850
851 static const unsigned *
852 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
853 void *mem_ctx,
854 struct anv_device *device,
855 struct anv_pipeline_stage *tes_stage,
856 struct anv_pipeline_stage *tcs_stage)
857 {
858 tes_stage->key.tes.inputs_read =
859 tcs_stage->nir->info.outputs_written;
860 tes_stage->key.tes.patch_inputs_read =
861 tcs_stage->nir->info.patch_outputs_written;
862
863 return brw_compile_tes(compiler, device, mem_ctx, &tes_stage->key.tes,
864 &tcs_stage->prog_data.tcs.base.vue_map,
865 &tes_stage->prog_data.tes, tes_stage->nir,
866 NULL, -1, NULL);
867 }
868
869 static void
870 anv_pipeline_link_gs(const struct brw_compiler *compiler,
871 struct anv_pipeline_stage *gs_stage,
872 struct anv_pipeline_stage *next_stage)
873 {
874 if (next_stage)
875 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
876 }
877
878 static const unsigned *
879 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
880 void *mem_ctx,
881 struct anv_device *device,
882 struct anv_pipeline_stage *gs_stage,
883 struct anv_pipeline_stage *prev_stage)
884 {
885 brw_compute_vue_map(compiler->devinfo,
886 &gs_stage->prog_data.gs.base.vue_map,
887 gs_stage->nir->info.outputs_written,
888 gs_stage->nir->info.separate_shader);
889
890 return brw_compile_gs(compiler, device, mem_ctx, &gs_stage->key.gs,
891 &gs_stage->prog_data.gs, gs_stage->nir,
892 NULL, -1, NULL);
893 }
894
895 static void
896 anv_pipeline_link_fs(const struct brw_compiler *compiler,
897 struct anv_pipeline_stage *stage)
898 {
899 unsigned num_rts = 0;
900 const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1;
901 struct anv_pipeline_binding rt_bindings[max_rt];
902 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
903 int rt_to_bindings[max_rt];
904 memset(rt_to_bindings, -1, sizeof(rt_to_bindings));
905 bool rt_used[max_rt];
906 memset(rt_used, 0, sizeof(rt_used));
907
908 /* Flag used render targets */
909 nir_foreach_variable_safe(var, &stage->nir->outputs) {
910 if (var->data.location < FRAG_RESULT_DATA0)
911 continue;
912
913 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
914 /* Out-of-bounds */
915 if (rt >= MAX_RTS)
916 continue;
917
918 const unsigned array_len =
919 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
920 assert(rt + array_len <= max_rt);
921
922 /* Unused */
923 if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
924 /* If this is the RT at location 0 and we have alpha to coverage
925 * enabled we will have to create a null RT for it, so mark it as
926 * used.
927 */
928 if (rt > 0 || !stage->key.wm.alpha_to_coverage)
929 continue;
930 }
931
932 for (unsigned i = 0; i < array_len; i++)
933 rt_used[rt + i] = true;
934 }
935
936 /* Set new, compacted, location */
937 for (unsigned i = 0; i < max_rt; i++) {
938 if (!rt_used[i])
939 continue;
940
941 rt_to_bindings[i] = num_rts;
942
943 if (stage->key.wm.color_outputs_valid & (1 << i)) {
944 rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
945 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
946 .binding = 0,
947 .index = i,
948 };
949 } else {
950 /* Setup a null render target */
951 rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
952 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
953 .binding = 0,
954 .index = UINT32_MAX,
955 };
956 }
957
958 num_rts++;
959 }
960
961 bool deleted_output = false;
962 nir_foreach_variable_safe(var, &stage->nir->outputs) {
963 if (var->data.location < FRAG_RESULT_DATA0)
964 continue;
965
966 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
967
968 if (rt >= MAX_RTS || !rt_used[rt]) {
969 /* Unused or out-of-bounds, throw it away, unless it is the first
970 * RT and we have alpha to coverage enabled.
971 */
972 deleted_output = true;
973 var->data.mode = nir_var_function_temp;
974 exec_node_remove(&var->node);
975 exec_list_push_tail(&impl->locals, &var->node);
976 continue;
977 }
978
979 /* Give it the new location */
980 assert(rt_to_bindings[rt] != -1);
981 var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0;
982 }
983
984 if (deleted_output)
985 nir_fixup_deref_modes(stage->nir);
986
987 if (num_rts == 0) {
988 /* If we have no render targets, we need a null render target */
989 rt_bindings[0] = (struct anv_pipeline_binding) {
990 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
991 .binding = 0,
992 .index = UINT32_MAX,
993 };
994 num_rts = 1;
995 }
996
997 /* Now that we've determined the actual number of render targets, adjust
998 * the key accordingly.
999 */
1000 stage->key.wm.nr_color_regions = num_rts;
1001 stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
1002
1003 assert(num_rts <= max_rt);
1004 assert(stage->bind_map.surface_count == 0);
1005 typed_memcpy(stage->bind_map.surface_to_descriptor,
1006 rt_bindings, num_rts);
1007 stage->bind_map.surface_count += num_rts;
1008 }
1009
1010 static const unsigned *
1011 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1012 void *mem_ctx,
1013 struct anv_device *device,
1014 struct anv_pipeline_stage *fs_stage,
1015 struct anv_pipeline_stage *prev_stage)
1016 {
1017 /* TODO: we could set this to 0 based on the information in nir_shader, but
1018 * we need this before we call spirv_to_nir.
1019 */
1020 assert(prev_stage);
1021 fs_stage->key.wm.input_slots_valid =
1022 prev_stage->prog_data.vue.vue_map.slots_valid;
1023
1024 const unsigned *code =
1025 brw_compile_fs(compiler, device, mem_ctx, &fs_stage->key.wm,
1026 &fs_stage->prog_data.wm, fs_stage->nir,
1027 NULL, -1, -1, -1, true, false, NULL, NULL);
1028
1029 if (fs_stage->key.wm.nr_color_regions == 0 &&
1030 !fs_stage->prog_data.wm.has_side_effects &&
1031 !fs_stage->prog_data.wm.uses_kill &&
1032 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1033 !fs_stage->prog_data.wm.computed_stencil) {
1034 /* This fragment shader has no outputs and no side effects. Go ahead
1035 * and return the code pointer so we don't accidentally think the
1036 * compile failed but zero out prog_data which will set program_size to
1037 * zero and disable the stage.
1038 */
1039 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1040 }
1041
1042 return code;
1043 }
1044
1045 static VkResult
1046 anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
1047 struct anv_pipeline_cache *cache,
1048 const VkGraphicsPipelineCreateInfo *info)
1049 {
1050 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1051 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1052 };
1053 int64_t pipeline_start = os_time_get_nano();
1054
1055 const struct brw_compiler *compiler =
1056 pipeline->device->instance->physicalDevice.compiler;
1057 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1058
1059 pipeline->active_stages = 0;
1060
1061 VkResult result;
1062 for (uint32_t i = 0; i < info->stageCount; i++) {
1063 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1064 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1065
1066 pipeline->active_stages |= sinfo->stage;
1067
1068 int64_t stage_start = os_time_get_nano();
1069
1070 stages[stage].stage = stage;
1071 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
1072 stages[stage].entrypoint = sinfo->pName;
1073 stages[stage].spec_info = sinfo->pSpecializationInfo;
1074 anv_pipeline_hash_shader(stages[stage].module,
1075 stages[stage].entrypoint,
1076 stage,
1077 stages[stage].spec_info,
1078 stages[stage].shader_sha1);
1079
1080 const struct gen_device_info *devinfo = &pipeline->device->info;
1081 switch (stage) {
1082 case MESA_SHADER_VERTEX:
1083 populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
1084 break;
1085 case MESA_SHADER_TESS_CTRL:
1086 populate_tcs_prog_key(devinfo, sinfo->flags,
1087 info->pTessellationState->patchControlPoints,
1088 &stages[stage].key.tcs);
1089 break;
1090 case MESA_SHADER_TESS_EVAL:
1091 populate_tes_prog_key(devinfo, sinfo->flags, &stages[stage].key.tes);
1092 break;
1093 case MESA_SHADER_GEOMETRY:
1094 populate_gs_prog_key(devinfo, sinfo->flags, &stages[stage].key.gs);
1095 break;
1096 case MESA_SHADER_FRAGMENT:
1097 populate_wm_prog_key(devinfo, sinfo->flags,
1098 pipeline->subpass,
1099 info->pMultisampleState,
1100 &stages[stage].key.wm);
1101 break;
1102 default:
1103 unreachable("Invalid graphics shader stage");
1104 }
1105
1106 stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1107 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1108 }
1109
1110 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1111 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1112
1113 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1114
1115 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1116
1117 unsigned char sha1[20];
1118 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1119
1120 unsigned found = 0;
1121 unsigned cache_hits = 0;
1122 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1123 if (!stages[s].entrypoint)
1124 continue;
1125
1126 int64_t stage_start = os_time_get_nano();
1127
1128 stages[s].cache_key.stage = s;
1129 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1130
1131 bool cache_hit;
1132 struct anv_shader_bin *bin =
1133 anv_device_search_for_kernel(pipeline->device, cache,
1134 &stages[s].cache_key,
1135 sizeof(stages[s].cache_key), &cache_hit);
1136 if (bin) {
1137 found++;
1138 pipeline->shaders[s] = bin;
1139 }
1140
1141 if (cache_hit) {
1142 cache_hits++;
1143 stages[s].feedback.flags |=
1144 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1145 }
1146 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1147 }
1148
1149 if (found == __builtin_popcount(pipeline->active_stages)) {
1150 if (cache_hits == found) {
1151 pipeline_feedback.flags |=
1152 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1153 }
1154 /* We found all our shaders in the cache. We're done. */
1155 goto done;
1156 } else if (found > 0) {
1157 /* We found some but not all of our shaders. This shouldn't happen
1158 * most of the time but it can if we have a partially populated
1159 * pipeline cache.
1160 */
1161 assert(found < __builtin_popcount(pipeline->active_stages));
1162
1163 vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
1164 VK_DEBUG_REPORT_WARNING_BIT_EXT |
1165 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1166 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1167 (uint64_t)(uintptr_t)cache,
1168 0, 0, "anv",
1169 "Found a partial pipeline in the cache. This is "
1170 "most likely caused by an incomplete pipeline cache "
1171 "import or export");
1172
1173 /* We're going to have to recompile anyway, so just throw away our
1174 * references to the shaders in the cache. We'll get them out of the
1175 * cache again as part of the compilation process.
1176 */
1177 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1178 stages[s].feedback.flags = 0;
1179 if (pipeline->shaders[s]) {
1180 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1181 pipeline->shaders[s] = NULL;
1182 }
1183 }
1184 }
1185
1186 void *pipeline_ctx = ralloc_context(NULL);
1187
1188 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1189 if (!stages[s].entrypoint)
1190 continue;
1191
1192 int64_t stage_start = os_time_get_nano();
1193
1194 assert(stages[s].stage == s);
1195 assert(pipeline->shaders[s] == NULL);
1196
1197 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1198 .surface_to_descriptor = stages[s].surface_to_descriptor,
1199 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1200 };
1201
1202 stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
1203 pipeline_ctx,
1204 &stages[s]);
1205 if (stages[s].nir == NULL) {
1206 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1207 goto fail;
1208 }
1209
1210 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1211 }
1212
1213 /* Walk backwards to link */
1214 struct anv_pipeline_stage *next_stage = NULL;
1215 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1216 if (!stages[s].entrypoint)
1217 continue;
1218
1219 switch (s) {
1220 case MESA_SHADER_VERTEX:
1221 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1222 break;
1223 case MESA_SHADER_TESS_CTRL:
1224 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1225 break;
1226 case MESA_SHADER_TESS_EVAL:
1227 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1228 break;
1229 case MESA_SHADER_GEOMETRY:
1230 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1231 break;
1232 case MESA_SHADER_FRAGMENT:
1233 anv_pipeline_link_fs(compiler, &stages[s]);
1234 break;
1235 default:
1236 unreachable("Invalid graphics shader stage");
1237 }
1238
1239 next_stage = &stages[s];
1240 }
1241
1242 struct anv_pipeline_stage *prev_stage = NULL;
1243 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1244 if (!stages[s].entrypoint)
1245 continue;
1246
1247 int64_t stage_start = os_time_get_nano();
1248
1249 void *stage_ctx = ralloc_context(NULL);
1250
1251 nir_xfb_info *xfb_info = NULL;
1252 if (s == MESA_SHADER_VERTEX ||
1253 s == MESA_SHADER_TESS_EVAL ||
1254 s == MESA_SHADER_GEOMETRY)
1255 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1256
1257 anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
1258
1259 const unsigned *code;
1260 switch (s) {
1261 case MESA_SHADER_VERTEX:
1262 code = anv_pipeline_compile_vs(compiler, stage_ctx, pipeline->device,
1263 &stages[s]);
1264 break;
1265 case MESA_SHADER_TESS_CTRL:
1266 code = anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->device,
1267 &stages[s], prev_stage);
1268 break;
1269 case MESA_SHADER_TESS_EVAL:
1270 code = anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->device,
1271 &stages[s], prev_stage);
1272 break;
1273 case MESA_SHADER_GEOMETRY:
1274 code = anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->device,
1275 &stages[s], prev_stage);
1276 break;
1277 case MESA_SHADER_FRAGMENT:
1278 code = anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->device,
1279 &stages[s], prev_stage);
1280 break;
1281 default:
1282 unreachable("Invalid graphics shader stage");
1283 }
1284 if (code == NULL) {
1285 ralloc_free(stage_ctx);
1286 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1287 goto fail;
1288 }
1289
1290 struct anv_shader_bin *bin =
1291 anv_device_upload_kernel(pipeline->device, cache,
1292 &stages[s].cache_key,
1293 sizeof(stages[s].cache_key),
1294 code, stages[s].prog_data.base.program_size,
1295 stages[s].nir->constant_data,
1296 stages[s].nir->constant_data_size,
1297 &stages[s].prog_data.base,
1298 brw_prog_data_size(s),
1299 xfb_info, &stages[s].bind_map);
1300 if (!bin) {
1301 ralloc_free(stage_ctx);
1302 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1303 goto fail;
1304 }
1305
1306 pipeline->shaders[s] = bin;
1307 ralloc_free(stage_ctx);
1308
1309 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1310
1311 prev_stage = &stages[s];
1312 }
1313
1314 ralloc_free(pipeline_ctx);
1315
1316 done:
1317
1318 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1319 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1320 /* This can happen if we decided to implicitly disable the fragment
1321 * shader. See anv_pipeline_compile_fs().
1322 */
1323 anv_shader_bin_unref(pipeline->device,
1324 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1325 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1326 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1327 }
1328
1329 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1330
1331 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1332 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1333 if (create_feedback) {
1334 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1335
1336 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1337 for (uint32_t i = 0; i < info->stageCount; i++) {
1338 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1339 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1340 }
1341 }
1342
1343 return VK_SUCCESS;
1344
1345 fail:
1346 ralloc_free(pipeline_ctx);
1347
1348 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1349 if (pipeline->shaders[s])
1350 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1351 }
1352
1353 return result;
1354 }
1355
1356 VkResult
1357 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
1358 struct anv_pipeline_cache *cache,
1359 const VkComputePipelineCreateInfo *info,
1360 const struct anv_shader_module *module,
1361 const char *entrypoint,
1362 const VkSpecializationInfo *spec_info)
1363 {
1364 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1365 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1366 };
1367 int64_t pipeline_start = os_time_get_nano();
1368
1369 const struct brw_compiler *compiler =
1370 pipeline->device->instance->physicalDevice.compiler;
1371
1372 struct anv_pipeline_stage stage = {
1373 .stage = MESA_SHADER_COMPUTE,
1374 .module = module,
1375 .entrypoint = entrypoint,
1376 .spec_info = spec_info,
1377 .cache_key = {
1378 .stage = MESA_SHADER_COMPUTE,
1379 },
1380 .feedback = {
1381 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1382 },
1383 };
1384 anv_pipeline_hash_shader(stage.module,
1385 stage.entrypoint,
1386 MESA_SHADER_COMPUTE,
1387 stage.spec_info,
1388 stage.shader_sha1);
1389
1390 struct anv_shader_bin *bin = NULL;
1391
1392 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1393 vk_find_struct_const(info->stage.pNext,
1394 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1395
1396 populate_cs_prog_key(&pipeline->device->info, info->stage.flags,
1397 rss_info, &stage.key.cs);
1398
1399 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1400
1401 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1402 bool cache_hit;
1403 bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
1404 sizeof(stage.cache_key), &cache_hit);
1405
1406 if (bin == NULL) {
1407 int64_t stage_start = os_time_get_nano();
1408
1409 stage.bind_map = (struct anv_pipeline_bind_map) {
1410 .surface_to_descriptor = stage.surface_to_descriptor,
1411 .sampler_to_descriptor = stage.sampler_to_descriptor
1412 };
1413
1414 /* Set up a binding for the gl_NumWorkGroups */
1415 stage.bind_map.surface_count = 1;
1416 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1417 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1418 };
1419
1420 void *mem_ctx = ralloc_context(NULL);
1421
1422 stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
1423 if (stage.nir == NULL) {
1424 ralloc_free(mem_ctx);
1425 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1426 }
1427
1428 anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
1429
1430 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
1431 &stage.prog_data.cs);
1432
1433 const unsigned *shader_code =
1434 brw_compile_cs(compiler, pipeline->device, mem_ctx, &stage.key.cs,
1435 &stage.prog_data.cs, stage.nir, -1, NULL);
1436 if (shader_code == NULL) {
1437 ralloc_free(mem_ctx);
1438 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1439 }
1440
1441 const unsigned code_size = stage.prog_data.base.program_size;
1442 bin = anv_device_upload_kernel(pipeline->device, cache,
1443 &stage.cache_key, sizeof(stage.cache_key),
1444 shader_code, code_size,
1445 stage.nir->constant_data,
1446 stage.nir->constant_data_size,
1447 &stage.prog_data.base,
1448 sizeof(stage.prog_data.cs),
1449 NULL, &stage.bind_map);
1450 if (!bin) {
1451 ralloc_free(mem_ctx);
1452 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1453 }
1454
1455 ralloc_free(mem_ctx);
1456
1457 stage.feedback.duration = os_time_get_nano() - stage_start;
1458 }
1459
1460 if (cache_hit) {
1461 stage.feedback.flags |=
1462 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1463 pipeline_feedback.flags |=
1464 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1465 }
1466 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1467
1468 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1469 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1470 if (create_feedback) {
1471 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1472
1473 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1474 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1475 }
1476
1477 pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
1478 pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
1479
1480 return VK_SUCCESS;
1481 }
1482
1483 /**
1484 * Copy pipeline state not marked as dynamic.
1485 * Dynamic state is pipeline state which hasn't been provided at pipeline
1486 * creation time, but is dynamically provided afterwards using various
1487 * vkCmdSet* functions.
1488 *
1489 * The set of state considered "non_dynamic" is determined by the pieces of
1490 * state that have their corresponding VkDynamicState enums omitted from
1491 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1492 *
1493 * @param[out] pipeline Destination non_dynamic state.
1494 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1495 */
1496 static void
1497 copy_non_dynamic_state(struct anv_pipeline *pipeline,
1498 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1499 {
1500 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1501 struct anv_subpass *subpass = pipeline->subpass;
1502
1503 pipeline->dynamic_state = default_dynamic_state;
1504
1505 if (pCreateInfo->pDynamicState) {
1506 /* Remove all of the states that are marked as dynamic */
1507 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1508 for (uint32_t s = 0; s < count; s++) {
1509 states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
1510 pCreateInfo->pDynamicState->pDynamicStates[s]);
1511 }
1512 }
1513
1514 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1515
1516 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1517 *
1518 * pViewportState is [...] NULL if the pipeline
1519 * has rasterization disabled.
1520 */
1521 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1522 assert(pCreateInfo->pViewportState);
1523
1524 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1525 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
1526 typed_memcpy(dynamic->viewport.viewports,
1527 pCreateInfo->pViewportState->pViewports,
1528 pCreateInfo->pViewportState->viewportCount);
1529 }
1530
1531 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1532 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
1533 typed_memcpy(dynamic->scissor.scissors,
1534 pCreateInfo->pViewportState->pScissors,
1535 pCreateInfo->pViewportState->scissorCount);
1536 }
1537 }
1538
1539 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
1540 assert(pCreateInfo->pRasterizationState);
1541 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1542 }
1543
1544 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
1545 assert(pCreateInfo->pRasterizationState);
1546 dynamic->depth_bias.bias =
1547 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1548 dynamic->depth_bias.clamp =
1549 pCreateInfo->pRasterizationState->depthBiasClamp;
1550 dynamic->depth_bias.slope =
1551 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1552 }
1553
1554 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1555 *
1556 * pColorBlendState is [...] NULL if the pipeline has rasterization
1557 * disabled or if the subpass of the render pass the pipeline is
1558 * created against does not use any color attachments.
1559 */
1560 bool uses_color_att = false;
1561 for (unsigned i = 0; i < subpass->color_count; ++i) {
1562 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1563 uses_color_att = true;
1564 break;
1565 }
1566 }
1567
1568 if (uses_color_att &&
1569 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1570 assert(pCreateInfo->pColorBlendState);
1571
1572 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
1573 typed_memcpy(dynamic->blend_constants,
1574 pCreateInfo->pColorBlendState->blendConstants, 4);
1575 }
1576
1577 /* If there is no depthstencil attachment, then don't read
1578 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1579 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1580 * no need to override the depthstencil defaults in
1581 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1582 *
1583 * Section 9.2 of the Vulkan 1.0.15 spec says:
1584 *
1585 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1586 * disabled or if the subpass of the render pass the pipeline is created
1587 * against does not use a depth/stencil attachment.
1588 */
1589 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1590 subpass->depth_stencil_attachment) {
1591 assert(pCreateInfo->pDepthStencilState);
1592
1593 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
1594 dynamic->depth_bounds.min =
1595 pCreateInfo->pDepthStencilState->minDepthBounds;
1596 dynamic->depth_bounds.max =
1597 pCreateInfo->pDepthStencilState->maxDepthBounds;
1598 }
1599
1600 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
1601 dynamic->stencil_compare_mask.front =
1602 pCreateInfo->pDepthStencilState->front.compareMask;
1603 dynamic->stencil_compare_mask.back =
1604 pCreateInfo->pDepthStencilState->back.compareMask;
1605 }
1606
1607 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
1608 dynamic->stencil_write_mask.front =
1609 pCreateInfo->pDepthStencilState->front.writeMask;
1610 dynamic->stencil_write_mask.back =
1611 pCreateInfo->pDepthStencilState->back.writeMask;
1612 }
1613
1614 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
1615 dynamic->stencil_reference.front =
1616 pCreateInfo->pDepthStencilState->front.reference;
1617 dynamic->stencil_reference.back =
1618 pCreateInfo->pDepthStencilState->back.reference;
1619 }
1620 }
1621
1622 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
1623 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
1624 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
1625 if (line_state) {
1626 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
1627 dynamic->line_stipple.factor = line_state->lineStippleFactor;
1628 dynamic->line_stipple.pattern = line_state->lineStipplePattern;
1629 }
1630 }
1631
1632 pipeline->dynamic_state_mask = states;
1633 }
1634
1635 static void
1636 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1637 {
1638 #ifdef DEBUG
1639 struct anv_render_pass *renderpass = NULL;
1640 struct anv_subpass *subpass = NULL;
1641
1642 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1643 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1644 */
1645 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1646
1647 renderpass = anv_render_pass_from_handle(info->renderPass);
1648 assert(renderpass);
1649
1650 assert(info->subpass < renderpass->subpass_count);
1651 subpass = &renderpass->subpasses[info->subpass];
1652
1653 assert(info->stageCount >= 1);
1654 assert(info->pVertexInputState);
1655 assert(info->pInputAssemblyState);
1656 assert(info->pRasterizationState);
1657 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1658 assert(info->pViewportState);
1659 assert(info->pMultisampleState);
1660
1661 if (subpass && subpass->depth_stencil_attachment)
1662 assert(info->pDepthStencilState);
1663
1664 if (subpass && subpass->color_count > 0) {
1665 bool all_color_unused = true;
1666 for (int i = 0; i < subpass->color_count; i++) {
1667 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1668 all_color_unused = false;
1669 }
1670 /* pColorBlendState is ignored if the pipeline has rasterization
1671 * disabled or if the subpass of the render pass the pipeline is
1672 * created against does not use any color attachments.
1673 */
1674 assert(info->pColorBlendState || all_color_unused);
1675 }
1676 }
1677
1678 for (uint32_t i = 0; i < info->stageCount; ++i) {
1679 switch (info->pStages[i].stage) {
1680 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1681 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1682 assert(info->pTessellationState);
1683 break;
1684 default:
1685 break;
1686 }
1687 }
1688 #endif
1689 }
1690
1691 /**
1692 * Calculate the desired L3 partitioning based on the current state of the
1693 * pipeline. For now this simply returns the conservative defaults calculated
1694 * by get_default_l3_weights(), but we could probably do better by gathering
1695 * more statistics from the pipeline state (e.g. guess of expected URB usage
1696 * and bound surfaces), or by using feed-back from performance counters.
1697 */
1698 void
1699 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1700 {
1701 const struct gen_device_info *devinfo = &pipeline->device->info;
1702
1703 const struct gen_l3_weights w =
1704 gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
1705
1706 pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
1707 pipeline->urb.total_size =
1708 gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
1709 }
1710
1711 VkResult
1712 anv_pipeline_init(struct anv_pipeline *pipeline,
1713 struct anv_device *device,
1714 struct anv_pipeline_cache *cache,
1715 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1716 const VkAllocationCallbacks *alloc)
1717 {
1718 VkResult result;
1719
1720 anv_pipeline_validate_create_info(pCreateInfo);
1721
1722 if (alloc == NULL)
1723 alloc = &device->alloc;
1724
1725 pipeline->device = device;
1726
1727 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1728 assert(pCreateInfo->subpass < render_pass->subpass_count);
1729 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1730
1731 result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
1732 if (result != VK_SUCCESS)
1733 return result;
1734
1735 pipeline->batch.alloc = alloc;
1736 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
1737 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1738 pipeline->batch.relocs = &pipeline->batch_relocs;
1739 pipeline->batch.status = VK_SUCCESS;
1740
1741 copy_non_dynamic_state(pipeline, pCreateInfo);
1742 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1743 pCreateInfo->pRasterizationState->depthClampEnable;
1744
1745 /* Previously we enabled depth clipping when !depthClampEnable.
1746 * DepthClipStateCreateInfo now makes depth clipping explicit so if the
1747 * clipping info is available, use its enable value to determine clipping,
1748 * otherwise fallback to the previous !depthClampEnable logic.
1749 */
1750 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
1751 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
1752 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
1753 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
1754
1755 pipeline->sample_shading_enable = pCreateInfo->pMultisampleState &&
1756 pCreateInfo->pMultisampleState->sampleShadingEnable;
1757
1758 pipeline->needs_data_cache = false;
1759
1760 /* When we free the pipeline, we detect stages based on the NULL status
1761 * of various prog_data pointers. Make them NULL by default.
1762 */
1763 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1764
1765 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
1766 if (result != VK_SUCCESS) {
1767 anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1768 return result;
1769 }
1770
1771 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
1772
1773 anv_pipeline_setup_l3_config(pipeline, false);
1774
1775 const VkPipelineVertexInputStateCreateInfo *vi_info =
1776 pCreateInfo->pVertexInputState;
1777
1778 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1779
1780 pipeline->vb_used = 0;
1781 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1782 const VkVertexInputAttributeDescription *desc =
1783 &vi_info->pVertexAttributeDescriptions[i];
1784
1785 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
1786 pipeline->vb_used |= 1 << desc->binding;
1787 }
1788
1789 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1790 const VkVertexInputBindingDescription *desc =
1791 &vi_info->pVertexBindingDescriptions[i];
1792
1793 pipeline->vb[desc->binding].stride = desc->stride;
1794
1795 /* Step rate is programmed per vertex element (attribute), not
1796 * binding. Set up a map of which bindings step per instance, for
1797 * reference by vertex element setup. */
1798 switch (desc->inputRate) {
1799 default:
1800 case VK_VERTEX_INPUT_RATE_VERTEX:
1801 pipeline->vb[desc->binding].instanced = false;
1802 break;
1803 case VK_VERTEX_INPUT_RATE_INSTANCE:
1804 pipeline->vb[desc->binding].instanced = true;
1805 break;
1806 }
1807
1808 pipeline->vb[desc->binding].instance_divisor = 1;
1809 }
1810
1811 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
1812 vk_find_struct_const(vi_info->pNext,
1813 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1814 if (vi_div_state) {
1815 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
1816 const VkVertexInputBindingDivisorDescriptionEXT *desc =
1817 &vi_div_state->pVertexBindingDivisors[i];
1818
1819 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
1820 }
1821 }
1822
1823 /* Our implementation of VK_KHR_multiview uses instancing to draw the
1824 * different views. If the client asks for instancing, we need to multiply
1825 * the instance divisor by the number of views ensure that we repeat the
1826 * client's per-instance data once for each view.
1827 */
1828 if (pipeline->subpass->view_mask) {
1829 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
1830 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
1831 if (pipeline->vb[vb].instanced)
1832 pipeline->vb[vb].instance_divisor *= view_count;
1833 }
1834 }
1835
1836 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1837 pCreateInfo->pInputAssemblyState;
1838 const VkPipelineTessellationStateCreateInfo *tess_info =
1839 pCreateInfo->pTessellationState;
1840 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1841
1842 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1843 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
1844 else
1845 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1846
1847 return VK_SUCCESS;
1848 }