856c018d10ee4c31c0cdc5e656469cf2749ca7be
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/gen_l3_config.h"
33 #include "common/gen_disasm.h"
34 #include "anv_private.h"
35 #include "compiler/brw_nir.h"
36 #include "anv_nir.h"
37 #include "nir/nir_xfb_info.h"
38 #include "spirv/nir_spirv.h"
39 #include "vk_util.h"
40
41 /* Needed for SWIZZLE macros */
42 #include "program/prog_instruction.h"
43
44 // Shader functions
45
46 VkResult anv_CreateShaderModule(
47 VkDevice _device,
48 const VkShaderModuleCreateInfo* pCreateInfo,
49 const VkAllocationCallbacks* pAllocator,
50 VkShaderModule* pShaderModule)
51 {
52 ANV_FROM_HANDLE(anv_device, device, _device);
53 struct anv_shader_module *module;
54
55 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
56 assert(pCreateInfo->flags == 0);
57
58 module = vk_alloc2(&device->vk.alloc, pAllocator,
59 sizeof(*module) + pCreateInfo->codeSize, 8,
60 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
61 if (module == NULL)
62 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
63
64 vk_object_base_init(&device->vk, &module->base,
65 VK_OBJECT_TYPE_SHADER_MODULE);
66 module->size = pCreateInfo->codeSize;
67 memcpy(module->data, pCreateInfo->pCode, module->size);
68
69 _mesa_sha1_compute(module->data, module->size, module->sha1);
70
71 *pShaderModule = anv_shader_module_to_handle(module);
72
73 return VK_SUCCESS;
74 }
75
76 void anv_DestroyShaderModule(
77 VkDevice _device,
78 VkShaderModule _module,
79 const VkAllocationCallbacks* pAllocator)
80 {
81 ANV_FROM_HANDLE(anv_device, device, _device);
82 ANV_FROM_HANDLE(anv_shader_module, module, _module);
83
84 if (!module)
85 return;
86
87 vk_object_base_finish(&module->base);
88 vk_free2(&device->vk.alloc, pAllocator, module);
89 }
90
91 #define SPIR_V_MAGIC_NUMBER 0x07230203
92
93 struct anv_spirv_debug_data {
94 struct anv_device *device;
95 const struct anv_shader_module *module;
96 };
97
98 static void anv_spirv_nir_debug(void *private_data,
99 enum nir_spirv_debug_level level,
100 size_t spirv_offset,
101 const char *message)
102 {
103 struct anv_spirv_debug_data *debug_data = private_data;
104 struct anv_instance *instance = debug_data->device->physical->instance;
105
106 static const VkDebugReportFlagsEXT vk_flags[] = {
107 [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
108 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
109 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
110 };
111 char buffer[256];
112
113 snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
114
115 vk_debug_report(&instance->debug_report_callbacks,
116 vk_flags[level],
117 VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
118 (uint64_t) (uintptr_t) debug_data->module,
119 0, 0, "anv", buffer);
120 }
121
122 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
123 * we can't do that yet because we don't have the ability to copy nir.
124 */
125 static nir_shader *
126 anv_shader_compile_to_nir(struct anv_device *device,
127 void *mem_ctx,
128 const struct anv_shader_module *module,
129 const char *entrypoint_name,
130 gl_shader_stage stage,
131 const VkSpecializationInfo *spec_info)
132 {
133 const struct anv_physical_device *pdevice = device->physical;
134 const struct brw_compiler *compiler = pdevice->compiler;
135 const nir_shader_compiler_options *nir_options =
136 compiler->glsl_compiler_options[stage].NirOptions;
137
138 uint32_t *spirv = (uint32_t *) module->data;
139 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
140 assert(module->size % 4 == 0);
141
142 uint32_t num_spec_entries = 0;
143 struct nir_spirv_specialization *spec_entries = NULL;
144 if (spec_info && spec_info->mapEntryCount > 0) {
145 num_spec_entries = spec_info->mapEntryCount;
146 spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
147 for (uint32_t i = 0; i < num_spec_entries; i++) {
148 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
149 const void *data = spec_info->pData + entry.offset;
150 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
151
152 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
153 switch (entry.size) {
154 case 8:
155 spec_entries[i].value.u64 = *(const uint64_t *)data;
156 break;
157 case 4:
158 spec_entries[i].value.u32 = *(const uint32_t *)data;
159 break;
160 case 2:
161 spec_entries[i].value.u16 = *(const uint16_t *)data;
162 break;
163 case 1:
164 spec_entries[i].value.u8 = *(const uint8_t *)data;
165 break;
166 default:
167 assert(!"Invalid spec constant size");
168 break;
169 }
170 }
171 }
172
173 struct anv_spirv_debug_data spirv_debug_data = {
174 .device = device,
175 .module = module,
176 };
177 struct spirv_to_nir_options spirv_options = {
178 .frag_coord_is_sysval = true,
179 .caps = {
180 .demote_to_helper_invocation = true,
181 .derivative_group = true,
182 .descriptor_array_dynamic_indexing = true,
183 .descriptor_array_non_uniform_indexing = true,
184 .descriptor_indexing = true,
185 .device_group = true,
186 .draw_parameters = true,
187 .float16 = pdevice->info.gen >= 8,
188 .float64 = pdevice->info.gen >= 8,
189 .fragment_shader_sample_interlock = pdevice->info.gen >= 9,
190 .fragment_shader_pixel_interlock = pdevice->info.gen >= 9,
191 .geometry_streams = true,
192 .image_write_without_format = true,
193 .int8 = pdevice->info.gen >= 8,
194 .int16 = pdevice->info.gen >= 8,
195 .int64 = pdevice->info.gen >= 8,
196 .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
197 .integer_functions2 = pdevice->info.gen >= 8,
198 .min_lod = true,
199 .multiview = true,
200 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
201 .post_depth_coverage = pdevice->info.gen >= 9,
202 .runtime_descriptor_array = true,
203 .float_controls = pdevice->info.gen >= 8,
204 .shader_clock = true,
205 .shader_viewport_index_layer = true,
206 .stencil_export = pdevice->info.gen >= 9,
207 .storage_8bit = pdevice->info.gen >= 8,
208 .storage_16bit = pdevice->info.gen >= 8,
209 .subgroup_arithmetic = true,
210 .subgroup_basic = true,
211 .subgroup_ballot = true,
212 .subgroup_quad = true,
213 .subgroup_shuffle = true,
214 .subgroup_vote = true,
215 .tessellation = true,
216 .transform_feedback = pdevice->info.gen >= 8,
217 .variable_pointers = true,
218 .vk_memory_model = true,
219 .vk_memory_model_device_scope = true,
220 },
221 .ubo_addr_format = nir_address_format_32bit_index_offset,
222 .ssbo_addr_format =
223 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
224 .phys_ssbo_addr_format = nir_address_format_64bit_global,
225 .push_const_addr_format = nir_address_format_logical,
226
227 /* TODO: Consider changing this to an address format that has the NULL
228 * pointer equals to 0. That might be a better format to play nice
229 * with certain code / code generators.
230 */
231 .shared_addr_format = nir_address_format_32bit_offset,
232 .debug = {
233 .func = anv_spirv_nir_debug,
234 .private_data = &spirv_debug_data,
235 },
236 };
237
238
239 nir_shader *nir =
240 spirv_to_nir(spirv, module->size / 4,
241 spec_entries, num_spec_entries,
242 stage, entrypoint_name, &spirv_options, nir_options);
243 assert(nir->info.stage == stage);
244 nir_validate_shader(nir, "after spirv_to_nir");
245 ralloc_steal(mem_ctx, nir);
246
247 free(spec_entries);
248
249 if (unlikely(INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage))) {
250 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
251 gl_shader_stage_name(stage));
252 nir_print_shader(nir, stderr);
253 }
254
255 /* We have to lower away local constant initializers right before we
256 * inline functions. That way they get properly initialized at the top
257 * of the function and not at the top of its caller.
258 */
259 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
260 NIR_PASS_V(nir, nir_lower_returns);
261 NIR_PASS_V(nir, nir_inline_functions);
262 NIR_PASS_V(nir, nir_opt_deref);
263
264 /* Pick off the single entrypoint that we want */
265 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
266 if (!func->is_entrypoint)
267 exec_node_remove(&func->node);
268 }
269 assert(exec_list_length(&nir->functions) == 1);
270
271 /* Now that we've deleted all but the main function, we can go ahead and
272 * lower the rest of the constant initializers. We do this here so that
273 * nir_remove_dead_variables and split_per_member_structs below see the
274 * corresponding stores.
275 */
276 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
277
278 /* Split member structs. We do this before lower_io_to_temporaries so that
279 * it doesn't lower system values to temporaries by accident.
280 */
281 NIR_PASS_V(nir, nir_split_var_copies);
282 NIR_PASS_V(nir, nir_split_per_member_structs);
283
284 NIR_PASS_V(nir, nir_remove_dead_variables,
285 nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
286 NULL);
287
288 NIR_PASS_V(nir, nir_propagate_invariant);
289 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
290 nir_shader_get_entrypoint(nir), true, false);
291
292 NIR_PASS_V(nir, nir_lower_frexp);
293
294 /* Vulkan uses the separate-shader linking model */
295 nir->info.separate_shader = true;
296
297 brw_preprocess_nir(compiler, nir, NULL);
298
299 return nir;
300 }
301
302 VkResult
303 anv_pipeline_init(struct anv_pipeline *pipeline,
304 struct anv_device *device,
305 enum anv_pipeline_type type,
306 VkPipelineCreateFlags flags,
307 const VkAllocationCallbacks *pAllocator)
308 {
309 VkResult result;
310
311 memset(pipeline, 0, sizeof(*pipeline));
312
313 vk_object_base_init(&device->vk, &pipeline->base,
314 VK_OBJECT_TYPE_PIPELINE);
315 pipeline->device = device;
316
317 /* It's the job of the child class to provide actual backing storage for
318 * the batch by setting batch.start, batch.next, and batch.end.
319 */
320 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
321 pipeline->batch.relocs = &pipeline->batch_relocs;
322 pipeline->batch.status = VK_SUCCESS;
323
324 result = anv_reloc_list_init(&pipeline->batch_relocs,
325 pipeline->batch.alloc);
326 if (result != VK_SUCCESS)
327 return result;
328
329 pipeline->mem_ctx = ralloc_context(NULL);
330
331 pipeline->type = type;
332 pipeline->flags = flags;
333
334 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
335
336 return VK_SUCCESS;
337 }
338
339 void
340 anv_pipeline_finish(struct anv_pipeline *pipeline,
341 struct anv_device *device,
342 const VkAllocationCallbacks *pAllocator)
343 {
344 anv_reloc_list_finish(&pipeline->batch_relocs,
345 pAllocator ? pAllocator : &device->vk.alloc);
346 ralloc_free(pipeline->mem_ctx);
347 vk_object_base_finish(&pipeline->base);
348 }
349
350 void anv_DestroyPipeline(
351 VkDevice _device,
352 VkPipeline _pipeline,
353 const VkAllocationCallbacks* pAllocator)
354 {
355 ANV_FROM_HANDLE(anv_device, device, _device);
356 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
357
358 if (!pipeline)
359 return;
360
361 switch (pipeline->type) {
362 case ANV_PIPELINE_GRAPHICS: {
363 struct anv_graphics_pipeline *gfx_pipeline =
364 anv_pipeline_to_graphics(pipeline);
365
366 if (gfx_pipeline->blend_state.map)
367 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
368
369 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
370 if (gfx_pipeline->shaders[s])
371 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
372 }
373 break;
374 }
375
376 case ANV_PIPELINE_COMPUTE: {
377 struct anv_compute_pipeline *compute_pipeline =
378 anv_pipeline_to_compute(pipeline);
379
380 if (compute_pipeline->cs)
381 anv_shader_bin_unref(device, compute_pipeline->cs);
382
383 break;
384 }
385
386 default:
387 unreachable("invalid pipeline type");
388 }
389
390 anv_pipeline_finish(pipeline, device, pAllocator);
391 vk_free2(&device->vk.alloc, pAllocator, pipeline);
392 }
393
394 static const uint32_t vk_to_gen_primitive_type[] = {
395 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
396 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
397 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
398 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
399 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
400 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
401 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
402 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
403 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
404 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
405 };
406
407 static void
408 populate_sampler_prog_key(const struct gen_device_info *devinfo,
409 struct brw_sampler_prog_key_data *key)
410 {
411 /* Almost all multisampled textures are compressed. The only time when we
412 * don't compress a multisampled texture is for 16x MSAA with a surface
413 * width greater than 8k which is a bit of an edge case. Since the sampler
414 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
415 * to tell the compiler to always assume compression.
416 */
417 key->compressed_multisample_layout_mask = ~0;
418
419 /* SkyLake added support for 16x MSAA. With this came a new message for
420 * reading from a 16x MSAA surface with compression. The new message was
421 * needed because now the MCS data is 64 bits instead of 32 or lower as is
422 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
423 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
424 * so we can just use it unconditionally. This may not be quite as
425 * efficient but it saves us from recompiling.
426 */
427 if (devinfo->gen >= 9)
428 key->msaa_16 = ~0;
429
430 /* XXX: Handle texture swizzle on HSW- */
431 for (int i = 0; i < MAX_SAMPLERS; i++) {
432 /* Assume color sampler, no swizzling. (Works for BDW+) */
433 key->swizzles[i] = SWIZZLE_XYZW;
434 }
435 }
436
437 static void
438 populate_base_prog_key(const struct gen_device_info *devinfo,
439 VkPipelineShaderStageCreateFlags flags,
440 struct brw_base_prog_key *key)
441 {
442 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
443 key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
444 else
445 key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
446
447 populate_sampler_prog_key(devinfo, &key->tex);
448 }
449
450 static void
451 populate_vs_prog_key(const struct gen_device_info *devinfo,
452 VkPipelineShaderStageCreateFlags flags,
453 struct brw_vs_prog_key *key)
454 {
455 memset(key, 0, sizeof(*key));
456
457 populate_base_prog_key(devinfo, flags, &key->base);
458
459 /* XXX: Handle vertex input work-arounds */
460
461 /* XXX: Handle sampler_prog_key */
462 }
463
464 static void
465 populate_tcs_prog_key(const struct gen_device_info *devinfo,
466 VkPipelineShaderStageCreateFlags flags,
467 unsigned input_vertices,
468 struct brw_tcs_prog_key *key)
469 {
470 memset(key, 0, sizeof(*key));
471
472 populate_base_prog_key(devinfo, flags, &key->base);
473
474 key->input_vertices = input_vertices;
475 }
476
477 static void
478 populate_tes_prog_key(const struct gen_device_info *devinfo,
479 VkPipelineShaderStageCreateFlags flags,
480 struct brw_tes_prog_key *key)
481 {
482 memset(key, 0, sizeof(*key));
483
484 populate_base_prog_key(devinfo, flags, &key->base);
485 }
486
487 static void
488 populate_gs_prog_key(const struct gen_device_info *devinfo,
489 VkPipelineShaderStageCreateFlags flags,
490 struct brw_gs_prog_key *key)
491 {
492 memset(key, 0, sizeof(*key));
493
494 populate_base_prog_key(devinfo, flags, &key->base);
495 }
496
497 static void
498 populate_wm_prog_key(const struct gen_device_info *devinfo,
499 VkPipelineShaderStageCreateFlags flags,
500 const struct anv_subpass *subpass,
501 const VkPipelineMultisampleStateCreateInfo *ms_info,
502 struct brw_wm_prog_key *key)
503 {
504 memset(key, 0, sizeof(*key));
505
506 populate_base_prog_key(devinfo, flags, &key->base);
507
508 /* We set this to 0 here and set to the actual value before we call
509 * brw_compile_fs.
510 */
511 key->input_slots_valid = 0;
512
513 /* Vulkan doesn't specify a default */
514 key->high_quality_derivatives = false;
515
516 /* XXX Vulkan doesn't appear to specify */
517 key->clamp_fragment_color = false;
518
519 key->ignore_sample_mask_out = false;
520
521 assert(subpass->color_count <= MAX_RTS);
522 for (uint32_t i = 0; i < subpass->color_count; i++) {
523 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
524 key->color_outputs_valid |= (1 << i);
525 }
526
527 key->nr_color_regions = subpass->color_count;
528
529 /* To reduce possible shader recompilations we would need to know if
530 * there is a SampleMask output variable to compute if we should emit
531 * code to workaround the issue that hardware disables alpha to coverage
532 * when there is SampleMask output.
533 */
534 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
535
536 /* Vulkan doesn't support fixed-function alpha test */
537 key->alpha_test_replicate_alpha = false;
538
539 if (ms_info) {
540 /* We should probably pull this out of the shader, but it's fairly
541 * harmless to compute it and then let dead-code take care of it.
542 */
543 if (ms_info->rasterizationSamples > 1) {
544 key->persample_interp = ms_info->sampleShadingEnable &&
545 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
546 key->multisample_fbo = true;
547 }
548
549 key->frag_coord_adds_sample_pos = key->persample_interp;
550 }
551 }
552
553 static void
554 populate_cs_prog_key(const struct gen_device_info *devinfo,
555 VkPipelineShaderStageCreateFlags flags,
556 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
557 struct brw_cs_prog_key *key)
558 {
559 memset(key, 0, sizeof(*key));
560
561 populate_base_prog_key(devinfo, flags, &key->base);
562
563 if (rss_info) {
564 assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
565
566 /* These enum values are expressly chosen to be equal to the subgroup
567 * size that they require.
568 */
569 assert(rss_info->requiredSubgroupSize == 8 ||
570 rss_info->requiredSubgroupSize == 16 ||
571 rss_info->requiredSubgroupSize == 32);
572 key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
573 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
574 /* If the client expressly requests full subgroups and they don't
575 * specify a subgroup size, we need to pick one. If they're requested
576 * varying subgroup sizes, we set it to UNIFORM and let the back-end
577 * compiler pick. Otherwise, we specify the API value of 32.
578 * Performance will likely be terrible in this case but there's nothing
579 * we can do about that. The client should have chosen a size.
580 */
581 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
582 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
583 else
584 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
585 }
586 }
587
588 struct anv_pipeline_stage {
589 gl_shader_stage stage;
590
591 const struct anv_shader_module *module;
592 const char *entrypoint;
593 const VkSpecializationInfo *spec_info;
594
595 unsigned char shader_sha1[20];
596
597 union brw_any_prog_key key;
598
599 struct {
600 gl_shader_stage stage;
601 unsigned char sha1[20];
602 } cache_key;
603
604 nir_shader *nir;
605
606 struct anv_pipeline_binding surface_to_descriptor[256];
607 struct anv_pipeline_binding sampler_to_descriptor[256];
608 struct anv_pipeline_bind_map bind_map;
609
610 union brw_any_prog_data prog_data;
611
612 uint32_t num_stats;
613 struct brw_compile_stats stats[3];
614 char *disasm[3];
615
616 VkPipelineCreationFeedbackEXT feedback;
617
618 const unsigned *code;
619 };
620
621 static void
622 anv_pipeline_hash_shader(const struct anv_shader_module *module,
623 const char *entrypoint,
624 gl_shader_stage stage,
625 const VkSpecializationInfo *spec_info,
626 unsigned char *sha1_out)
627 {
628 struct mesa_sha1 ctx;
629 _mesa_sha1_init(&ctx);
630
631 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
632 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
633 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
634 if (spec_info) {
635 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
636 spec_info->mapEntryCount *
637 sizeof(*spec_info->pMapEntries));
638 _mesa_sha1_update(&ctx, spec_info->pData,
639 spec_info->dataSize);
640 }
641
642 _mesa_sha1_final(&ctx, sha1_out);
643 }
644
645 static void
646 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
647 struct anv_pipeline_layout *layout,
648 struct anv_pipeline_stage *stages,
649 unsigned char *sha1_out)
650 {
651 struct mesa_sha1 ctx;
652 _mesa_sha1_init(&ctx);
653
654 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
655 sizeof(pipeline->subpass->view_mask));
656
657 if (layout)
658 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
659
660 const bool rba = pipeline->base.device->robust_buffer_access;
661 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
662
663 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
664 if (stages[s].entrypoint) {
665 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
666 sizeof(stages[s].shader_sha1));
667 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
668 }
669 }
670
671 _mesa_sha1_final(&ctx, sha1_out);
672 }
673
674 static void
675 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
676 struct anv_pipeline_layout *layout,
677 struct anv_pipeline_stage *stage,
678 unsigned char *sha1_out)
679 {
680 struct mesa_sha1 ctx;
681 _mesa_sha1_init(&ctx);
682
683 if (layout)
684 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
685
686 const bool rba = pipeline->base.device->robust_buffer_access;
687 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
688
689 _mesa_sha1_update(&ctx, stage->shader_sha1,
690 sizeof(stage->shader_sha1));
691 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
692
693 _mesa_sha1_final(&ctx, sha1_out);
694 }
695
696 static nir_shader *
697 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
698 struct anv_pipeline_cache *cache,
699 void *mem_ctx,
700 struct anv_pipeline_stage *stage)
701 {
702 const struct brw_compiler *compiler =
703 pipeline->device->physical->compiler;
704 const nir_shader_compiler_options *nir_options =
705 compiler->glsl_compiler_options[stage->stage].NirOptions;
706 nir_shader *nir;
707
708 nir = anv_device_search_for_nir(pipeline->device, cache,
709 nir_options,
710 stage->shader_sha1,
711 mem_ctx);
712 if (nir) {
713 assert(nir->info.stage == stage->stage);
714 return nir;
715 }
716
717 nir = anv_shader_compile_to_nir(pipeline->device,
718 mem_ctx,
719 stage->module,
720 stage->entrypoint,
721 stage->stage,
722 stage->spec_info);
723 if (nir) {
724 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
725 return nir;
726 }
727
728 return NULL;
729 }
730
731 static void
732 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
733 void *mem_ctx,
734 struct anv_pipeline_stage *stage,
735 struct anv_pipeline_layout *layout)
736 {
737 const struct anv_physical_device *pdevice = pipeline->device->physical;
738 const struct brw_compiler *compiler = pdevice->compiler;
739
740 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
741 nir_shader *nir = stage->nir;
742
743 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
744 NIR_PASS_V(nir, nir_lower_wpos_center,
745 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
746 NIR_PASS_V(nir, nir_lower_input_attachments, true);
747 }
748
749 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
750
751 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
752 NIR_PASS_V(nir, anv_nir_lower_multiview,
753 anv_pipeline_to_graphics(pipeline));
754 }
755
756 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
757
758 NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL);
759
760 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
761 nir_address_format_64bit_global);
762
763 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
764 anv_nir_apply_pipeline_layout(pdevice,
765 pipeline->device->robust_buffer_access,
766 layout, nir, &stage->bind_map);
767
768 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
769 nir_address_format_32bit_index_offset);
770 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
771 anv_nir_ssbo_addr_format(pdevice,
772 pipeline->device->robust_buffer_access));
773
774 NIR_PASS_V(nir, nir_opt_constant_folding);
775
776 /* We don't support non-uniform UBOs and non-uniform SSBO access is
777 * handled naturally by falling back to A64 messages.
778 */
779 NIR_PASS_V(nir, nir_lower_non_uniform_access,
780 nir_lower_non_uniform_texture_access |
781 nir_lower_non_uniform_image_access);
782
783 anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
784 nir, prog_data, &stage->bind_map, mem_ctx);
785
786 stage->nir = nir;
787 }
788
789 static void
790 anv_pipeline_link_vs(const struct brw_compiler *compiler,
791 struct anv_pipeline_stage *vs_stage,
792 struct anv_pipeline_stage *next_stage)
793 {
794 if (next_stage)
795 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
796 }
797
798 static void
799 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
800 void *mem_ctx,
801 struct anv_graphics_pipeline *pipeline,
802 struct anv_pipeline_stage *vs_stage)
803 {
804 /* When using Primitive Replication for multiview, each view gets its own
805 * position slot.
806 */
807 uint32_t pos_slots = pipeline->use_primitive_replication ?
808 anv_subpass_view_count(pipeline->subpass) : 1;
809
810 brw_compute_vue_map(compiler->devinfo,
811 &vs_stage->prog_data.vs.base.vue_map,
812 vs_stage->nir->info.outputs_written,
813 vs_stage->nir->info.separate_shader,
814 pos_slots);
815
816 vs_stage->num_stats = 1;
817 vs_stage->code = brw_compile_vs(compiler, pipeline->base.device, mem_ctx,
818 &vs_stage->key.vs,
819 &vs_stage->prog_data.vs,
820 vs_stage->nir, -1,
821 vs_stage->stats, NULL);
822 }
823
824 static void
825 merge_tess_info(struct shader_info *tes_info,
826 const struct shader_info *tcs_info)
827 {
828 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
829 *
830 * "PointMode. Controls generation of points rather than triangles
831 * or lines. This functionality defaults to disabled, and is
832 * enabled if either shader stage includes the execution mode.
833 *
834 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
835 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
836 * and OutputVertices, it says:
837 *
838 * "One mode must be set in at least one of the tessellation
839 * shader stages."
840 *
841 * So, the fields can be set in either the TCS or TES, but they must
842 * agree if set in both. Our backend looks at TES, so bitwise-or in
843 * the values from the TCS.
844 */
845 assert(tcs_info->tess.tcs_vertices_out == 0 ||
846 tes_info->tess.tcs_vertices_out == 0 ||
847 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
848 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
849
850 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
851 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
852 tcs_info->tess.spacing == tes_info->tess.spacing);
853 tes_info->tess.spacing |= tcs_info->tess.spacing;
854
855 assert(tcs_info->tess.primitive_mode == 0 ||
856 tes_info->tess.primitive_mode == 0 ||
857 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
858 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
859 tes_info->tess.ccw |= tcs_info->tess.ccw;
860 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
861 }
862
863 static void
864 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
865 struct anv_pipeline_stage *tcs_stage,
866 struct anv_pipeline_stage *tes_stage)
867 {
868 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
869
870 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
871
872 nir_lower_patch_vertices(tes_stage->nir,
873 tcs_stage->nir->info.tess.tcs_vertices_out,
874 NULL);
875
876 /* Copy TCS info into the TES info */
877 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
878
879 /* Whacking the key after cache lookup is a bit sketchy, but all of
880 * this comes from the SPIR-V, which is part of the hash used for the
881 * pipeline cache. So it should be safe.
882 */
883 tcs_stage->key.tcs.tes_primitive_mode =
884 tes_stage->nir->info.tess.primitive_mode;
885 tcs_stage->key.tcs.quads_workaround =
886 compiler->devinfo->gen < 9 &&
887 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
888 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
889 }
890
891 static void
892 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
893 void *mem_ctx,
894 struct anv_device *device,
895 struct anv_pipeline_stage *tcs_stage,
896 struct anv_pipeline_stage *prev_stage)
897 {
898 tcs_stage->key.tcs.outputs_written =
899 tcs_stage->nir->info.outputs_written;
900 tcs_stage->key.tcs.patch_outputs_written =
901 tcs_stage->nir->info.patch_outputs_written;
902
903 tcs_stage->num_stats = 1;
904 tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
905 &tcs_stage->key.tcs,
906 &tcs_stage->prog_data.tcs,
907 tcs_stage->nir, -1,
908 tcs_stage->stats, NULL);
909 }
910
911 static void
912 anv_pipeline_link_tes(const struct brw_compiler *compiler,
913 struct anv_pipeline_stage *tes_stage,
914 struct anv_pipeline_stage *next_stage)
915 {
916 if (next_stage)
917 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
918 }
919
920 static void
921 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
922 void *mem_ctx,
923 struct anv_device *device,
924 struct anv_pipeline_stage *tes_stage,
925 struct anv_pipeline_stage *tcs_stage)
926 {
927 tes_stage->key.tes.inputs_read =
928 tcs_stage->nir->info.outputs_written;
929 tes_stage->key.tes.patch_inputs_read =
930 tcs_stage->nir->info.patch_outputs_written;
931
932 tes_stage->num_stats = 1;
933 tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
934 &tes_stage->key.tes,
935 &tcs_stage->prog_data.tcs.base.vue_map,
936 &tes_stage->prog_data.tes,
937 tes_stage->nir, -1,
938 tes_stage->stats, NULL);
939 }
940
941 static void
942 anv_pipeline_link_gs(const struct brw_compiler *compiler,
943 struct anv_pipeline_stage *gs_stage,
944 struct anv_pipeline_stage *next_stage)
945 {
946 if (next_stage)
947 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
948 }
949
950 static void
951 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
952 void *mem_ctx,
953 struct anv_device *device,
954 struct anv_pipeline_stage *gs_stage,
955 struct anv_pipeline_stage *prev_stage)
956 {
957 brw_compute_vue_map(compiler->devinfo,
958 &gs_stage->prog_data.gs.base.vue_map,
959 gs_stage->nir->info.outputs_written,
960 gs_stage->nir->info.separate_shader, 1);
961
962 gs_stage->num_stats = 1;
963 gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
964 &gs_stage->key.gs,
965 &gs_stage->prog_data.gs,
966 gs_stage->nir, NULL, -1,
967 gs_stage->stats, NULL);
968 }
969
970 static void
971 anv_pipeline_link_fs(const struct brw_compiler *compiler,
972 struct anv_pipeline_stage *stage)
973 {
974 unsigned num_rt_bindings;
975 struct anv_pipeline_binding rt_bindings[MAX_RTS];
976 if (stage->key.wm.nr_color_regions > 0) {
977 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
978 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
979 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
980 rt_bindings[rt] = (struct anv_pipeline_binding) {
981 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
982 .index = rt,
983 };
984 } else {
985 /* Setup a null render target */
986 rt_bindings[rt] = (struct anv_pipeline_binding) {
987 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
988 .index = UINT32_MAX,
989 };
990 }
991 }
992 num_rt_bindings = stage->key.wm.nr_color_regions;
993 } else {
994 /* Setup a null render target */
995 rt_bindings[0] = (struct anv_pipeline_binding) {
996 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
997 .index = UINT32_MAX,
998 };
999 num_rt_bindings = 1;
1000 }
1001
1002 assert(num_rt_bindings <= MAX_RTS);
1003 assert(stage->bind_map.surface_count == 0);
1004 typed_memcpy(stage->bind_map.surface_to_descriptor,
1005 rt_bindings, num_rt_bindings);
1006 stage->bind_map.surface_count += num_rt_bindings;
1007
1008 /* Now that we've set up the color attachments, we can go through and
1009 * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1010 * hopes that dead code can clean them up in this and any earlier shader
1011 * stages.
1012 */
1013 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1014 bool deleted_output = false;
1015 nir_foreach_variable_safe(var, &stage->nir->outputs) {
1016 /* TODO: We don't delete depth/stencil writes. We probably could if the
1017 * subpass doesn't have a depth/stencil attachment.
1018 */
1019 if (var->data.location < FRAG_RESULT_DATA0)
1020 continue;
1021
1022 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1023
1024 /* If this is the RT at location 0 and we have alpha to coverage
1025 * enabled we still need that write because it will affect the coverage
1026 * mask even if it's never written to a color target.
1027 */
1028 if (rt == 0 && stage->key.wm.alpha_to_coverage)
1029 continue;
1030
1031 const unsigned array_len =
1032 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1033 assert(rt + array_len <= MAX_RTS);
1034
1035 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1036 BITFIELD_RANGE(rt, array_len))) {
1037 deleted_output = true;
1038 var->data.mode = nir_var_function_temp;
1039 exec_node_remove(&var->node);
1040 exec_list_push_tail(&impl->locals, &var->node);
1041 }
1042 }
1043
1044 if (deleted_output)
1045 nir_fixup_deref_modes(stage->nir);
1046
1047 /* We stored the number of subpass color attachments in nr_color_regions
1048 * when calculating the key for caching. Now that we've computed the bind
1049 * map, we can reduce this to the actual max before we go into the back-end
1050 * compiler.
1051 */
1052 stage->key.wm.nr_color_regions =
1053 util_last_bit(stage->key.wm.color_outputs_valid);
1054 }
1055
1056 static void
1057 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1058 void *mem_ctx,
1059 struct anv_device *device,
1060 struct anv_pipeline_stage *fs_stage,
1061 struct anv_pipeline_stage *prev_stage)
1062 {
1063 /* TODO: we could set this to 0 based on the information in nir_shader, but
1064 * we need this before we call spirv_to_nir.
1065 */
1066 assert(prev_stage);
1067 fs_stage->key.wm.input_slots_valid =
1068 prev_stage->prog_data.vue.vue_map.slots_valid;
1069
1070 fs_stage->code = brw_compile_fs(compiler, device, mem_ctx,
1071 &fs_stage->key.wm,
1072 &fs_stage->prog_data.wm,
1073 fs_stage->nir, -1, -1, -1,
1074 true, false, NULL,
1075 fs_stage->stats, NULL);
1076
1077 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1078 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1079 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1080
1081 if (fs_stage->key.wm.color_outputs_valid == 0 &&
1082 !fs_stage->prog_data.wm.has_side_effects &&
1083 !fs_stage->prog_data.wm.uses_omask &&
1084 !fs_stage->key.wm.alpha_to_coverage &&
1085 !fs_stage->prog_data.wm.uses_kill &&
1086 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1087 !fs_stage->prog_data.wm.computed_stencil) {
1088 /* This fragment shader has no outputs and no side effects. Go ahead
1089 * and return the code pointer so we don't accidentally think the
1090 * compile failed but zero out prog_data which will set program_size to
1091 * zero and disable the stage.
1092 */
1093 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1094 }
1095 }
1096
1097 static void
1098 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1099 struct anv_pipeline_stage *stage,
1100 struct brw_compile_stats *stats,
1101 uint32_t code_offset)
1102 {
1103 char *nir = NULL;
1104 if (stage->nir &&
1105 (pipeline->flags &
1106 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1107 char *stream_data = NULL;
1108 size_t stream_size = 0;
1109 FILE *stream = open_memstream(&stream_data, &stream_size);
1110
1111 nir_print_shader(stage->nir, stream);
1112
1113 fclose(stream);
1114
1115 /* Copy it to a ralloc'd thing */
1116 nir = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1117 memcpy(nir, stream_data, stream_size);
1118 nir[stream_size] = 0;
1119
1120 free(stream_data);
1121 }
1122
1123 char *disasm = NULL;
1124 if (stage->code &&
1125 (pipeline->flags &
1126 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1127 char *stream_data = NULL;
1128 size_t stream_size = 0;
1129 FILE *stream = open_memstream(&stream_data, &stream_size);
1130
1131 uint32_t push_size = 0;
1132 for (unsigned i = 0; i < 4; i++)
1133 push_size += stage->bind_map.push_ranges[i].length;
1134 if (push_size > 0) {
1135 fprintf(stream, "Push constant ranges:\n");
1136 for (unsigned i = 0; i < 4; i++) {
1137 if (stage->bind_map.push_ranges[i].length == 0)
1138 continue;
1139
1140 fprintf(stream, " RANGE%d (%dB): ", i,
1141 stage->bind_map.push_ranges[i].length * 32);
1142
1143 switch (stage->bind_map.push_ranges[i].set) {
1144 case ANV_DESCRIPTOR_SET_NULL:
1145 fprintf(stream, "NULL");
1146 break;
1147
1148 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1149 fprintf(stream, "Vulkan push constants and API params");
1150 break;
1151
1152 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1153 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1154 stage->bind_map.push_ranges[i].index,
1155 stage->bind_map.push_ranges[i].start * 32);
1156 break;
1157
1158 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1159 unreachable("gl_NumWorkgroups is never pushed");
1160
1161 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1162 fprintf(stream, "Inline shader constant data (start=%dB)",
1163 stage->bind_map.push_ranges[i].start * 32);
1164 break;
1165
1166 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1167 unreachable("Color attachments can't be pushed");
1168
1169 default:
1170 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1171 stage->bind_map.push_ranges[i].set,
1172 stage->bind_map.push_ranges[i].index,
1173 stage->bind_map.push_ranges[i].start * 32);
1174 break;
1175 }
1176 fprintf(stream, "\n");
1177 }
1178 fprintf(stream, "\n");
1179 }
1180
1181 /* Creating this is far cheaper than it looks. It's perfectly fine to
1182 * do it for every binary.
1183 */
1184 struct gen_disasm *d = gen_disasm_create(&pipeline->device->info);
1185 gen_disasm_disassemble(d, stage->code, code_offset, stream);
1186 gen_disasm_destroy(d);
1187
1188 fclose(stream);
1189
1190 /* Copy it to a ralloc'd thing */
1191 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1192 memcpy(disasm, stream_data, stream_size);
1193 disasm[stream_size] = 0;
1194
1195 free(stream_data);
1196 }
1197
1198 const struct anv_pipeline_executable exe = {
1199 .stage = stage->stage,
1200 .stats = *stats,
1201 .nir = nir,
1202 .disasm = disasm,
1203 };
1204 util_dynarray_append(&pipeline->executables,
1205 struct anv_pipeline_executable, exe);
1206 }
1207
1208 static void
1209 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1210 struct anv_pipeline_stage *stage,
1211 struct anv_shader_bin *bin)
1212 {
1213 if (stage->stage == MESA_SHADER_FRAGMENT) {
1214 /* We pull the prog data and stats out of the anv_shader_bin because
1215 * the anv_pipeline_stage may not be fully populated if we successfully
1216 * looked up the shader in a cache.
1217 */
1218 const struct brw_wm_prog_data *wm_prog_data =
1219 (const struct brw_wm_prog_data *)bin->prog_data;
1220 struct brw_compile_stats *stats = bin->stats;
1221
1222 if (wm_prog_data->dispatch_8) {
1223 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1224 }
1225
1226 if (wm_prog_data->dispatch_16) {
1227 anv_pipeline_add_executable(pipeline, stage, stats++,
1228 wm_prog_data->prog_offset_16);
1229 }
1230
1231 if (wm_prog_data->dispatch_32) {
1232 anv_pipeline_add_executable(pipeline, stage, stats++,
1233 wm_prog_data->prog_offset_32);
1234 }
1235 } else {
1236 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1237 }
1238 }
1239
1240 static void
1241 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1242 {
1243 /* TODO: Cache this pipeline-wide information. */
1244
1245 /* Primitive replication depends on information from all the shaders.
1246 * Recover this bit from the fact that we have more than one position slot
1247 * in the vertex shader when using it.
1248 */
1249 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1250 int pos_slots = 0;
1251 const struct brw_vue_prog_data *vue_prog_data =
1252 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1253 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1254 for (int i = 0; i < vue_map->num_slots; i++) {
1255 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1256 pos_slots++;
1257 }
1258 pipeline->use_primitive_replication = pos_slots > 1;
1259 }
1260
1261 static VkResult
1262 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1263 struct anv_pipeline_cache *cache,
1264 const VkGraphicsPipelineCreateInfo *info)
1265 {
1266 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1267 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1268 };
1269 int64_t pipeline_start = os_time_get_nano();
1270
1271 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1272 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1273
1274 pipeline->active_stages = 0;
1275
1276 VkResult result;
1277 for (uint32_t i = 0; i < info->stageCount; i++) {
1278 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1279 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1280
1281 pipeline->active_stages |= sinfo->stage;
1282
1283 int64_t stage_start = os_time_get_nano();
1284
1285 stages[stage].stage = stage;
1286 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
1287 stages[stage].entrypoint = sinfo->pName;
1288 stages[stage].spec_info = sinfo->pSpecializationInfo;
1289 anv_pipeline_hash_shader(stages[stage].module,
1290 stages[stage].entrypoint,
1291 stage,
1292 stages[stage].spec_info,
1293 stages[stage].shader_sha1);
1294
1295 const struct gen_device_info *devinfo = &pipeline->base.device->info;
1296 switch (stage) {
1297 case MESA_SHADER_VERTEX:
1298 populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
1299 break;
1300 case MESA_SHADER_TESS_CTRL:
1301 populate_tcs_prog_key(devinfo, sinfo->flags,
1302 info->pTessellationState->patchControlPoints,
1303 &stages[stage].key.tcs);
1304 break;
1305 case MESA_SHADER_TESS_EVAL:
1306 populate_tes_prog_key(devinfo, sinfo->flags, &stages[stage].key.tes);
1307 break;
1308 case MESA_SHADER_GEOMETRY:
1309 populate_gs_prog_key(devinfo, sinfo->flags, &stages[stage].key.gs);
1310 break;
1311 case MESA_SHADER_FRAGMENT: {
1312 const bool raster_enabled =
1313 !info->pRasterizationState->rasterizerDiscardEnable;
1314 populate_wm_prog_key(devinfo, sinfo->flags,
1315 pipeline->subpass,
1316 raster_enabled ? info->pMultisampleState : NULL,
1317 &stages[stage].key.wm);
1318 break;
1319 }
1320 default:
1321 unreachable("Invalid graphics shader stage");
1322 }
1323
1324 stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1325 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1326 }
1327
1328 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1329 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1330
1331 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1332
1333 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1334
1335 unsigned char sha1[20];
1336 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1337
1338 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1339 if (!stages[s].entrypoint)
1340 continue;
1341
1342 stages[s].cache_key.stage = s;
1343 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1344 }
1345
1346 const bool skip_cache_lookup =
1347 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1348
1349 if (!skip_cache_lookup) {
1350 unsigned found = 0;
1351 unsigned cache_hits = 0;
1352 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1353 if (!stages[s].entrypoint)
1354 continue;
1355
1356 int64_t stage_start = os_time_get_nano();
1357
1358 bool cache_hit;
1359 struct anv_shader_bin *bin =
1360 anv_device_search_for_kernel(pipeline->base.device, cache,
1361 &stages[s].cache_key,
1362 sizeof(stages[s].cache_key), &cache_hit);
1363 if (bin) {
1364 found++;
1365 pipeline->shaders[s] = bin;
1366 }
1367
1368 if (cache_hit) {
1369 cache_hits++;
1370 stages[s].feedback.flags |=
1371 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1372 }
1373 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1374 }
1375
1376 if (found == __builtin_popcount(pipeline->active_stages)) {
1377 if (cache_hits == found) {
1378 pipeline_feedback.flags |=
1379 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1380 }
1381 /* We found all our shaders in the cache. We're done. */
1382 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1383 if (!stages[s].entrypoint)
1384 continue;
1385
1386 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1387 pipeline->shaders[s]);
1388 }
1389 anv_pipeline_init_from_cached_graphics(pipeline);
1390 goto done;
1391 } else if (found > 0) {
1392 /* We found some but not all of our shaders. This shouldn't happen
1393 * most of the time but it can if we have a partially populated
1394 * pipeline cache.
1395 */
1396 assert(found < __builtin_popcount(pipeline->active_stages));
1397
1398 vk_debug_report(&pipeline->base.device->physical->instance->debug_report_callbacks,
1399 VK_DEBUG_REPORT_WARNING_BIT_EXT |
1400 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1401 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1402 (uint64_t)(uintptr_t)cache,
1403 0, 0, "anv",
1404 "Found a partial pipeline in the cache. This is "
1405 "most likely caused by an incomplete pipeline cache "
1406 "import or export");
1407
1408 /* We're going to have to recompile anyway, so just throw away our
1409 * references to the shaders in the cache. We'll get them out of the
1410 * cache again as part of the compilation process.
1411 */
1412 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1413 stages[s].feedback.flags = 0;
1414 if (pipeline->shaders[s]) {
1415 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1416 pipeline->shaders[s] = NULL;
1417 }
1418 }
1419 }
1420 }
1421
1422 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1423 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1424
1425 void *pipeline_ctx = ralloc_context(NULL);
1426
1427 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1428 if (!stages[s].entrypoint)
1429 continue;
1430
1431 int64_t stage_start = os_time_get_nano();
1432
1433 assert(stages[s].stage == s);
1434 assert(pipeline->shaders[s] == NULL);
1435
1436 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1437 .surface_to_descriptor = stages[s].surface_to_descriptor,
1438 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1439 };
1440
1441 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1442 pipeline_ctx,
1443 &stages[s]);
1444 if (stages[s].nir == NULL) {
1445 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1446 goto fail;
1447 }
1448
1449 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1450 }
1451
1452 /* Walk backwards to link */
1453 struct anv_pipeline_stage *next_stage = NULL;
1454 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1455 if (!stages[s].entrypoint)
1456 continue;
1457
1458 switch (s) {
1459 case MESA_SHADER_VERTEX:
1460 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1461 break;
1462 case MESA_SHADER_TESS_CTRL:
1463 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1464 break;
1465 case MESA_SHADER_TESS_EVAL:
1466 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1467 break;
1468 case MESA_SHADER_GEOMETRY:
1469 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1470 break;
1471 case MESA_SHADER_FRAGMENT:
1472 anv_pipeline_link_fs(compiler, &stages[s]);
1473 break;
1474 default:
1475 unreachable("Invalid graphics shader stage");
1476 }
1477
1478 next_stage = &stages[s];
1479 }
1480
1481 if (pipeline->base.device->info.gen >= 12 &&
1482 pipeline->subpass->view_mask != 0) {
1483 /* For some pipelines HW Primitive Replication can be used instead of
1484 * instancing to implement Multiview. This depend on how viewIndex is
1485 * used in all the active shaders, so this check can't be done per
1486 * individual shaders.
1487 */
1488 nir_shader *shaders[MESA_SHADER_STAGES] = {};
1489 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1490 shaders[s] = stages[s].nir;
1491
1492 pipeline->use_primitive_replication =
1493 anv_check_for_primitive_replication(shaders, pipeline);
1494 } else {
1495 pipeline->use_primitive_replication = false;
1496 }
1497
1498 struct anv_pipeline_stage *prev_stage = NULL;
1499 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1500 if (!stages[s].entrypoint)
1501 continue;
1502
1503 int64_t stage_start = os_time_get_nano();
1504
1505 void *stage_ctx = ralloc_context(NULL);
1506
1507 nir_xfb_info *xfb_info = NULL;
1508 if (s == MESA_SHADER_VERTEX ||
1509 s == MESA_SHADER_TESS_EVAL ||
1510 s == MESA_SHADER_GEOMETRY)
1511 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1512
1513 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1514
1515 switch (s) {
1516 case MESA_SHADER_VERTEX:
1517 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1518 &stages[s]);
1519 break;
1520 case MESA_SHADER_TESS_CTRL:
1521 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1522 &stages[s], prev_stage);
1523 break;
1524 case MESA_SHADER_TESS_EVAL:
1525 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1526 &stages[s], prev_stage);
1527 break;
1528 case MESA_SHADER_GEOMETRY:
1529 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1530 &stages[s], prev_stage);
1531 break;
1532 case MESA_SHADER_FRAGMENT:
1533 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1534 &stages[s], prev_stage);
1535 break;
1536 default:
1537 unreachable("Invalid graphics shader stage");
1538 }
1539 if (stages[s].code == NULL) {
1540 ralloc_free(stage_ctx);
1541 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1542 goto fail;
1543 }
1544
1545 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1546 &stages[s].bind_map);
1547
1548 struct anv_shader_bin *bin =
1549 anv_device_upload_kernel(pipeline->base.device, cache, s,
1550 &stages[s].cache_key,
1551 sizeof(stages[s].cache_key),
1552 stages[s].code,
1553 stages[s].prog_data.base.program_size,
1554 stages[s].nir->constant_data,
1555 stages[s].nir->constant_data_size,
1556 &stages[s].prog_data.base,
1557 brw_prog_data_size(s),
1558 stages[s].stats, stages[s].num_stats,
1559 xfb_info, &stages[s].bind_map);
1560 if (!bin) {
1561 ralloc_free(stage_ctx);
1562 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1563 goto fail;
1564 }
1565
1566 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1567
1568 pipeline->shaders[s] = bin;
1569 ralloc_free(stage_ctx);
1570
1571 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1572
1573 prev_stage = &stages[s];
1574 }
1575
1576 ralloc_free(pipeline_ctx);
1577
1578 done:
1579
1580 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1581 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1582 /* This can happen if we decided to implicitly disable the fragment
1583 * shader. See anv_pipeline_compile_fs().
1584 */
1585 anv_shader_bin_unref(pipeline->base.device,
1586 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1587 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1588 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1589 }
1590
1591 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1592
1593 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1594 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1595 if (create_feedback) {
1596 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1597
1598 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1599 for (uint32_t i = 0; i < info->stageCount; i++) {
1600 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1601 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1602 }
1603 }
1604
1605 return VK_SUCCESS;
1606
1607 fail:
1608 ralloc_free(pipeline_ctx);
1609
1610 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1611 if (pipeline->shaders[s])
1612 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1613 }
1614
1615 return result;
1616 }
1617
1618 static void
1619 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
1620 {
1621 assert(glsl_type_is_vector_or_scalar(type));
1622
1623 uint32_t comp_size = glsl_type_is_boolean(type)
1624 ? 4 : glsl_get_bit_size(type) / 8;
1625 unsigned length = glsl_get_vector_elements(type);
1626 *size = comp_size * length,
1627 *align = comp_size * (length == 3 ? 4 : length);
1628 }
1629
1630 VkResult
1631 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1632 struct anv_pipeline_cache *cache,
1633 const VkComputePipelineCreateInfo *info,
1634 const struct anv_shader_module *module,
1635 const char *entrypoint,
1636 const VkSpecializationInfo *spec_info)
1637 {
1638 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1639 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1640 };
1641 int64_t pipeline_start = os_time_get_nano();
1642
1643 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1644
1645 struct anv_pipeline_stage stage = {
1646 .stage = MESA_SHADER_COMPUTE,
1647 .module = module,
1648 .entrypoint = entrypoint,
1649 .spec_info = spec_info,
1650 .cache_key = {
1651 .stage = MESA_SHADER_COMPUTE,
1652 },
1653 .feedback = {
1654 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1655 },
1656 };
1657 anv_pipeline_hash_shader(stage.module,
1658 stage.entrypoint,
1659 MESA_SHADER_COMPUTE,
1660 stage.spec_info,
1661 stage.shader_sha1);
1662
1663 struct anv_shader_bin *bin = NULL;
1664
1665 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1666 vk_find_struct_const(info->stage.pNext,
1667 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1668
1669 populate_cs_prog_key(&pipeline->base.device->info, info->stage.flags,
1670 rss_info, &stage.key.cs);
1671
1672 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1673
1674 const bool skip_cache_lookup =
1675 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1676
1677 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1678
1679 bool cache_hit = false;
1680 if (!skip_cache_lookup) {
1681 bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1682 &stage.cache_key,
1683 sizeof(stage.cache_key),
1684 &cache_hit);
1685 }
1686
1687 if (bin == NULL &&
1688 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1689 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1690
1691 void *mem_ctx = ralloc_context(NULL);
1692 if (bin == NULL) {
1693 int64_t stage_start = os_time_get_nano();
1694
1695 stage.bind_map = (struct anv_pipeline_bind_map) {
1696 .surface_to_descriptor = stage.surface_to_descriptor,
1697 .sampler_to_descriptor = stage.sampler_to_descriptor
1698 };
1699
1700 /* Set up a binding for the gl_NumWorkGroups */
1701 stage.bind_map.surface_count = 1;
1702 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1703 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1704 };
1705
1706 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1707 if (stage.nir == NULL) {
1708 ralloc_free(mem_ctx);
1709 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1710 }
1711
1712 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1713
1714 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1715
1716 NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
1717 nir_var_mem_shared, shared_type_info);
1718 NIR_PASS_V(stage.nir, nir_lower_explicit_io,
1719 nir_var_mem_shared, nir_address_format_32bit_offset);
1720 NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1721
1722 stage.num_stats = 1;
1723 stage.code = brw_compile_cs(compiler, pipeline->base.device, mem_ctx,
1724 &stage.key.cs, &stage.prog_data.cs,
1725 stage.nir, -1, stage.stats, NULL);
1726 if (stage.code == NULL) {
1727 ralloc_free(mem_ctx);
1728 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1729 }
1730
1731 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1732
1733 if (!stage.prog_data.cs.uses_num_work_groups) {
1734 assert(stage.bind_map.surface_to_descriptor[0].set ==
1735 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1736 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1737 }
1738
1739 const unsigned code_size = stage.prog_data.base.program_size;
1740 bin = anv_device_upload_kernel(pipeline->base.device, cache,
1741 MESA_SHADER_COMPUTE,
1742 &stage.cache_key, sizeof(stage.cache_key),
1743 stage.code, code_size,
1744 stage.nir->constant_data,
1745 stage.nir->constant_data_size,
1746 &stage.prog_data.base,
1747 sizeof(stage.prog_data.cs),
1748 stage.stats, stage.num_stats,
1749 NULL, &stage.bind_map);
1750 if (!bin) {
1751 ralloc_free(mem_ctx);
1752 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1753 }
1754
1755 stage.feedback.duration = os_time_get_nano() - stage_start;
1756 }
1757
1758 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1759
1760 ralloc_free(mem_ctx);
1761
1762 if (cache_hit) {
1763 stage.feedback.flags |=
1764 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1765 pipeline_feedback.flags |=
1766 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1767 }
1768 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1769
1770 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1771 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1772 if (create_feedback) {
1773 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1774
1775 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1776 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1777 }
1778
1779 pipeline->cs = bin;
1780
1781 return VK_SUCCESS;
1782 }
1783
1784 struct anv_cs_parameters
1785 anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
1786 {
1787 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1788
1789 struct anv_cs_parameters cs_params = {};
1790
1791 cs_params.group_size = cs_prog_data->local_size[0] *
1792 cs_prog_data->local_size[1] *
1793 cs_prog_data->local_size[2];
1794 cs_params.simd_size =
1795 brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
1796 cs_prog_data, cs_params.group_size);
1797 cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
1798
1799 return cs_params;
1800 }
1801
1802 /**
1803 * Copy pipeline state not marked as dynamic.
1804 * Dynamic state is pipeline state which hasn't been provided at pipeline
1805 * creation time, but is dynamically provided afterwards using various
1806 * vkCmdSet* functions.
1807 *
1808 * The set of state considered "non_dynamic" is determined by the pieces of
1809 * state that have their corresponding VkDynamicState enums omitted from
1810 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1811 *
1812 * @param[out] pipeline Destination non_dynamic state.
1813 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1814 */
1815 static void
1816 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
1817 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1818 {
1819 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1820 struct anv_subpass *subpass = pipeline->subpass;
1821
1822 pipeline->dynamic_state = default_dynamic_state;
1823
1824 if (pCreateInfo->pDynamicState) {
1825 /* Remove all of the states that are marked as dynamic */
1826 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1827 for (uint32_t s = 0; s < count; s++) {
1828 states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
1829 pCreateInfo->pDynamicState->pDynamicStates[s]);
1830 }
1831 }
1832
1833 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1834
1835 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1836 *
1837 * pViewportState is [...] NULL if the pipeline
1838 * has rasterization disabled.
1839 */
1840 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1841 assert(pCreateInfo->pViewportState);
1842
1843 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1844 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
1845 typed_memcpy(dynamic->viewport.viewports,
1846 pCreateInfo->pViewportState->pViewports,
1847 pCreateInfo->pViewportState->viewportCount);
1848 }
1849
1850 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1851 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
1852 typed_memcpy(dynamic->scissor.scissors,
1853 pCreateInfo->pViewportState->pScissors,
1854 pCreateInfo->pViewportState->scissorCount);
1855 }
1856 }
1857
1858 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
1859 assert(pCreateInfo->pRasterizationState);
1860 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1861 }
1862
1863 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
1864 assert(pCreateInfo->pRasterizationState);
1865 dynamic->depth_bias.bias =
1866 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1867 dynamic->depth_bias.clamp =
1868 pCreateInfo->pRasterizationState->depthBiasClamp;
1869 dynamic->depth_bias.slope =
1870 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1871 }
1872
1873 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1874 *
1875 * pColorBlendState is [...] NULL if the pipeline has rasterization
1876 * disabled or if the subpass of the render pass the pipeline is
1877 * created against does not use any color attachments.
1878 */
1879 bool uses_color_att = false;
1880 for (unsigned i = 0; i < subpass->color_count; ++i) {
1881 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1882 uses_color_att = true;
1883 break;
1884 }
1885 }
1886
1887 if (uses_color_att &&
1888 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1889 assert(pCreateInfo->pColorBlendState);
1890
1891 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
1892 typed_memcpy(dynamic->blend_constants,
1893 pCreateInfo->pColorBlendState->blendConstants, 4);
1894 }
1895
1896 /* If there is no depthstencil attachment, then don't read
1897 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1898 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1899 * no need to override the depthstencil defaults in
1900 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1901 *
1902 * Section 9.2 of the Vulkan 1.0.15 spec says:
1903 *
1904 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1905 * disabled or if the subpass of the render pass the pipeline is created
1906 * against does not use a depth/stencil attachment.
1907 */
1908 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1909 subpass->depth_stencil_attachment) {
1910 assert(pCreateInfo->pDepthStencilState);
1911
1912 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
1913 dynamic->depth_bounds.min =
1914 pCreateInfo->pDepthStencilState->minDepthBounds;
1915 dynamic->depth_bounds.max =
1916 pCreateInfo->pDepthStencilState->maxDepthBounds;
1917 }
1918
1919 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
1920 dynamic->stencil_compare_mask.front =
1921 pCreateInfo->pDepthStencilState->front.compareMask;
1922 dynamic->stencil_compare_mask.back =
1923 pCreateInfo->pDepthStencilState->back.compareMask;
1924 }
1925
1926 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
1927 dynamic->stencil_write_mask.front =
1928 pCreateInfo->pDepthStencilState->front.writeMask;
1929 dynamic->stencil_write_mask.back =
1930 pCreateInfo->pDepthStencilState->back.writeMask;
1931 }
1932
1933 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
1934 dynamic->stencil_reference.front =
1935 pCreateInfo->pDepthStencilState->front.reference;
1936 dynamic->stencil_reference.back =
1937 pCreateInfo->pDepthStencilState->back.reference;
1938 }
1939 }
1940
1941 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
1942 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
1943 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
1944 if (line_state) {
1945 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
1946 dynamic->line_stipple.factor = line_state->lineStippleFactor;
1947 dynamic->line_stipple.pattern = line_state->lineStipplePattern;
1948 }
1949 }
1950
1951 pipeline->dynamic_state_mask = states;
1952 }
1953
1954 static void
1955 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1956 {
1957 #ifdef DEBUG
1958 struct anv_render_pass *renderpass = NULL;
1959 struct anv_subpass *subpass = NULL;
1960
1961 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1962 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1963 */
1964 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1965
1966 renderpass = anv_render_pass_from_handle(info->renderPass);
1967 assert(renderpass);
1968
1969 assert(info->subpass < renderpass->subpass_count);
1970 subpass = &renderpass->subpasses[info->subpass];
1971
1972 assert(info->stageCount >= 1);
1973 assert(info->pVertexInputState);
1974 assert(info->pInputAssemblyState);
1975 assert(info->pRasterizationState);
1976 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1977 assert(info->pViewportState);
1978 assert(info->pMultisampleState);
1979
1980 if (subpass && subpass->depth_stencil_attachment)
1981 assert(info->pDepthStencilState);
1982
1983 if (subpass && subpass->color_count > 0) {
1984 bool all_color_unused = true;
1985 for (int i = 0; i < subpass->color_count; i++) {
1986 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1987 all_color_unused = false;
1988 }
1989 /* pColorBlendState is ignored if the pipeline has rasterization
1990 * disabled or if the subpass of the render pass the pipeline is
1991 * created against does not use any color attachments.
1992 */
1993 assert(info->pColorBlendState || all_color_unused);
1994 }
1995 }
1996
1997 for (uint32_t i = 0; i < info->stageCount; ++i) {
1998 switch (info->pStages[i].stage) {
1999 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2000 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2001 assert(info->pTessellationState);
2002 break;
2003 default:
2004 break;
2005 }
2006 }
2007 #endif
2008 }
2009
2010 /**
2011 * Calculate the desired L3 partitioning based on the current state of the
2012 * pipeline. For now this simply returns the conservative defaults calculated
2013 * by get_default_l3_weights(), but we could probably do better by gathering
2014 * more statistics from the pipeline state (e.g. guess of expected URB usage
2015 * and bound surfaces), or by using feed-back from performance counters.
2016 */
2017 void
2018 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2019 {
2020 const struct gen_device_info *devinfo = &pipeline->device->info;
2021
2022 const struct gen_l3_weights w =
2023 gen_get_default_l3_weights(devinfo, true, needs_slm);
2024
2025 pipeline->l3_config = gen_get_l3_config(devinfo, w);
2026 }
2027
2028 VkResult
2029 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2030 struct anv_device *device,
2031 struct anv_pipeline_cache *cache,
2032 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2033 const VkAllocationCallbacks *alloc)
2034 {
2035 VkResult result;
2036
2037 anv_pipeline_validate_create_info(pCreateInfo);
2038
2039 result = anv_pipeline_init(&pipeline->base, device,
2040 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2041 alloc);
2042 if (result != VK_SUCCESS)
2043 return result;
2044
2045 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2046 pipeline->batch_data, sizeof(pipeline->batch_data));
2047
2048 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2049 assert(pCreateInfo->subpass < render_pass->subpass_count);
2050 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2051
2052 assert(pCreateInfo->pRasterizationState);
2053
2054 copy_non_dynamic_state(pipeline, pCreateInfo);
2055 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2056
2057 /* Previously we enabled depth clipping when !depthClampEnable.
2058 * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2059 * clipping info is available, use its enable value to determine clipping,
2060 * otherwise fallback to the previous !depthClampEnable logic.
2061 */
2062 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2063 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2064 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2065 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2066
2067 pipeline->sample_shading_enable =
2068 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2069 pCreateInfo->pMultisampleState &&
2070 pCreateInfo->pMultisampleState->sampleShadingEnable;
2071
2072 /* When we free the pipeline, we detect stages based on the NULL status
2073 * of various prog_data pointers. Make them NULL by default.
2074 */
2075 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
2076
2077 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2078 if (result != VK_SUCCESS) {
2079 anv_pipeline_finish(&pipeline->base, device, alloc);
2080 return result;
2081 }
2082
2083 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
2084
2085 anv_pipeline_setup_l3_config(&pipeline->base, false);
2086
2087 const VkPipelineVertexInputStateCreateInfo *vi_info =
2088 pCreateInfo->pVertexInputState;
2089
2090 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2091
2092 pipeline->vb_used = 0;
2093 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2094 const VkVertexInputAttributeDescription *desc =
2095 &vi_info->pVertexAttributeDescriptions[i];
2096
2097 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2098 pipeline->vb_used |= 1 << desc->binding;
2099 }
2100
2101 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2102 const VkVertexInputBindingDescription *desc =
2103 &vi_info->pVertexBindingDescriptions[i];
2104
2105 pipeline->vb[desc->binding].stride = desc->stride;
2106
2107 /* Step rate is programmed per vertex element (attribute), not
2108 * binding. Set up a map of which bindings step per instance, for
2109 * reference by vertex element setup. */
2110 switch (desc->inputRate) {
2111 default:
2112 case VK_VERTEX_INPUT_RATE_VERTEX:
2113 pipeline->vb[desc->binding].instanced = false;
2114 break;
2115 case VK_VERTEX_INPUT_RATE_INSTANCE:
2116 pipeline->vb[desc->binding].instanced = true;
2117 break;
2118 }
2119
2120 pipeline->vb[desc->binding].instance_divisor = 1;
2121 }
2122
2123 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2124 vk_find_struct_const(vi_info->pNext,
2125 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2126 if (vi_div_state) {
2127 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2128 const VkVertexInputBindingDivisorDescriptionEXT *desc =
2129 &vi_div_state->pVertexBindingDivisors[i];
2130
2131 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2132 }
2133 }
2134
2135 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2136 * different views. If the client asks for instancing, we need to multiply
2137 * the instance divisor by the number of views ensure that we repeat the
2138 * client's per-instance data once for each view.
2139 */
2140 if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2141 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2142 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2143 if (pipeline->vb[vb].instanced)
2144 pipeline->vb[vb].instance_divisor *= view_count;
2145 }
2146 }
2147
2148 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2149 pCreateInfo->pInputAssemblyState;
2150 const VkPipelineTessellationStateCreateInfo *tess_info =
2151 pCreateInfo->pTessellationState;
2152 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
2153
2154 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2155 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2156 else
2157 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
2158
2159 return VK_SUCCESS;
2160 }
2161
2162 #define WRITE_STR(field, ...) ({ \
2163 memset(field, 0, sizeof(field)); \
2164 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2165 assert(i > 0 && i < sizeof(field)); \
2166 })
2167
2168 VkResult anv_GetPipelineExecutablePropertiesKHR(
2169 VkDevice device,
2170 const VkPipelineInfoKHR* pPipelineInfo,
2171 uint32_t* pExecutableCount,
2172 VkPipelineExecutablePropertiesKHR* pProperties)
2173 {
2174 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2175 VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
2176
2177 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
2178 vk_outarray_append(&out, props) {
2179 gl_shader_stage stage = exe->stage;
2180 props->stages = mesa_to_vk_shader_stage(stage);
2181
2182 unsigned simd_width = exe->stats.dispatch_width;
2183 if (stage == MESA_SHADER_FRAGMENT) {
2184 WRITE_STR(props->name, "%s%d %s",
2185 simd_width ? "SIMD" : "vec",
2186 simd_width ? simd_width : 4,
2187 _mesa_shader_stage_to_string(stage));
2188 } else {
2189 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
2190 }
2191 WRITE_STR(props->description, "%s%d %s shader",
2192 simd_width ? "SIMD" : "vec",
2193 simd_width ? simd_width : 4,
2194 _mesa_shader_stage_to_string(stage));
2195
2196 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
2197 * wants a subgroup size of 1.
2198 */
2199 props->subgroupSize = MAX2(simd_width, 1);
2200 }
2201 }
2202
2203 return vk_outarray_status(&out);
2204 }
2205
2206 static const struct anv_pipeline_executable *
2207 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
2208 {
2209 assert(index < util_dynarray_num_elements(&pipeline->executables,
2210 struct anv_pipeline_executable));
2211 return util_dynarray_element(
2212 &pipeline->executables, struct anv_pipeline_executable, index);
2213 }
2214
2215 VkResult anv_GetPipelineExecutableStatisticsKHR(
2216 VkDevice device,
2217 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2218 uint32_t* pStatisticCount,
2219 VkPipelineExecutableStatisticKHR* pStatistics)
2220 {
2221 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2222 VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
2223
2224 const struct anv_pipeline_executable *exe =
2225 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2226
2227 const struct brw_stage_prog_data *prog_data;
2228 switch (pipeline->type) {
2229 case ANV_PIPELINE_GRAPHICS: {
2230 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
2231 break;
2232 }
2233 case ANV_PIPELINE_COMPUTE: {
2234 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
2235 break;
2236 }
2237 default:
2238 unreachable("invalid pipeline type");
2239 }
2240
2241 vk_outarray_append(&out, stat) {
2242 WRITE_STR(stat->name, "Instruction Count");
2243 WRITE_STR(stat->description,
2244 "Number of GEN instructions in the final generated "
2245 "shader executable.");
2246 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2247 stat->value.u64 = exe->stats.instructions;
2248 }
2249
2250 vk_outarray_append(&out, stat) {
2251 WRITE_STR(stat->name, "SEND Count");
2252 WRITE_STR(stat->description,
2253 "Number of instructions in the final generated shader "
2254 "executable which access external units such as the "
2255 "constant cache or the sampler.");
2256 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2257 stat->value.u64 = exe->stats.sends;
2258 }
2259
2260 vk_outarray_append(&out, stat) {
2261 WRITE_STR(stat->name, "Loop Count");
2262 WRITE_STR(stat->description,
2263 "Number of loops (not unrolled) in the final generated "
2264 "shader executable.");
2265 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2266 stat->value.u64 = exe->stats.loops;
2267 }
2268
2269 vk_outarray_append(&out, stat) {
2270 WRITE_STR(stat->name, "Cycle Count");
2271 WRITE_STR(stat->description,
2272 "Estimate of the number of EU cycles required to execute "
2273 "the final generated executable. This is an estimate only "
2274 "and may vary greatly from actual run-time performance.");
2275 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2276 stat->value.u64 = exe->stats.cycles;
2277 }
2278
2279 vk_outarray_append(&out, stat) {
2280 WRITE_STR(stat->name, "Spill Count");
2281 WRITE_STR(stat->description,
2282 "Number of scratch spill operations. This gives a rough "
2283 "estimate of the cost incurred due to spilling temporary "
2284 "values to memory. If this is non-zero, you may want to "
2285 "adjust your shader to reduce register pressure.");
2286 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2287 stat->value.u64 = exe->stats.spills;
2288 }
2289
2290 vk_outarray_append(&out, stat) {
2291 WRITE_STR(stat->name, "Fill Count");
2292 WRITE_STR(stat->description,
2293 "Number of scratch fill operations. This gives a rough "
2294 "estimate of the cost incurred due to spilling temporary "
2295 "values to memory. If this is non-zero, you may want to "
2296 "adjust your shader to reduce register pressure.");
2297 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2298 stat->value.u64 = exe->stats.fills;
2299 }
2300
2301 vk_outarray_append(&out, stat) {
2302 WRITE_STR(stat->name, "Scratch Memory Size");
2303 WRITE_STR(stat->description,
2304 "Number of bytes of scratch memory required by the "
2305 "generated shader executable. If this is non-zero, you "
2306 "may want to adjust your shader to reduce register "
2307 "pressure.");
2308 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2309 stat->value.u64 = prog_data->total_scratch;
2310 }
2311
2312 if (exe->stage == MESA_SHADER_COMPUTE) {
2313 vk_outarray_append(&out, stat) {
2314 WRITE_STR(stat->name, "Workgroup Memory Size");
2315 WRITE_STR(stat->description,
2316 "Number of bytes of workgroup shared memory used by this "
2317 "compute shader including any padding.");
2318 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2319 stat->value.u64 = brw_cs_prog_data_const(prog_data)->slm_size;
2320 }
2321 }
2322
2323 return vk_outarray_status(&out);
2324 }
2325
2326 static bool
2327 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2328 const char *data)
2329 {
2330 ir->isText = VK_TRUE;
2331
2332 size_t data_len = strlen(data) + 1;
2333
2334 if (ir->pData == NULL) {
2335 ir->dataSize = data_len;
2336 return true;
2337 }
2338
2339 strncpy(ir->pData, data, ir->dataSize);
2340 if (ir->dataSize < data_len)
2341 return false;
2342
2343 ir->dataSize = data_len;
2344 return true;
2345 }
2346
2347 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2348 VkDevice device,
2349 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2350 uint32_t* pInternalRepresentationCount,
2351 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2352 {
2353 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2354 VK_OUTARRAY_MAKE(out, pInternalRepresentations,
2355 pInternalRepresentationCount);
2356 bool incomplete_text = false;
2357
2358 const struct anv_pipeline_executable *exe =
2359 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2360
2361 if (exe->nir) {
2362 vk_outarray_append(&out, ir) {
2363 WRITE_STR(ir->name, "Final NIR");
2364 WRITE_STR(ir->description,
2365 "Final NIR before going into the back-end compiler");
2366
2367 if (!write_ir_text(ir, exe->nir))
2368 incomplete_text = true;
2369 }
2370 }
2371
2372 if (exe->disasm) {
2373 vk_outarray_append(&out, ir) {
2374 WRITE_STR(ir->name, "GEN Assembly");
2375 WRITE_STR(ir->description,
2376 "Final GEN assembly for the generated shader binary");
2377
2378 if (!write_ir_text(ir, exe->disasm))
2379 incomplete_text = true;
2380 }
2381 }
2382
2383 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2384 }