anv: Refactor setting descriptors with immutable sampler
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/gen_l3_config.h"
33 #include "common/gen_disasm.h"
34 #include "anv_private.h"
35 #include "compiler/brw_nir.h"
36 #include "anv_nir.h"
37 #include "nir/nir_xfb_info.h"
38 #include "spirv/nir_spirv.h"
39 #include "vk_util.h"
40
41 /* Needed for SWIZZLE macros */
42 #include "program/prog_instruction.h"
43
44 // Shader functions
45
46 VkResult anv_CreateShaderModule(
47 VkDevice _device,
48 const VkShaderModuleCreateInfo* pCreateInfo,
49 const VkAllocationCallbacks* pAllocator,
50 VkShaderModule* pShaderModule)
51 {
52 ANV_FROM_HANDLE(anv_device, device, _device);
53 struct anv_shader_module *module;
54
55 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
56 assert(pCreateInfo->flags == 0);
57
58 module = vk_alloc2(&device->vk.alloc, pAllocator,
59 sizeof(*module) + pCreateInfo->codeSize, 8,
60 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
61 if (module == NULL)
62 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
63
64 vk_object_base_init(&device->vk, &module->base,
65 VK_OBJECT_TYPE_SHADER_MODULE);
66 module->size = pCreateInfo->codeSize;
67 memcpy(module->data, pCreateInfo->pCode, module->size);
68
69 _mesa_sha1_compute(module->data, module->size, module->sha1);
70
71 *pShaderModule = anv_shader_module_to_handle(module);
72
73 return VK_SUCCESS;
74 }
75
76 void anv_DestroyShaderModule(
77 VkDevice _device,
78 VkShaderModule _module,
79 const VkAllocationCallbacks* pAllocator)
80 {
81 ANV_FROM_HANDLE(anv_device, device, _device);
82 ANV_FROM_HANDLE(anv_shader_module, module, _module);
83
84 if (!module)
85 return;
86
87 vk_object_base_finish(&module->base);
88 vk_free2(&device->vk.alloc, pAllocator, module);
89 }
90
91 #define SPIR_V_MAGIC_NUMBER 0x07230203
92
93 struct anv_spirv_debug_data {
94 struct anv_device *device;
95 const struct anv_shader_module *module;
96 };
97
98 static void anv_spirv_nir_debug(void *private_data,
99 enum nir_spirv_debug_level level,
100 size_t spirv_offset,
101 const char *message)
102 {
103 struct anv_spirv_debug_data *debug_data = private_data;
104 struct anv_instance *instance = debug_data->device->physical->instance;
105
106 static const VkDebugReportFlagsEXT vk_flags[] = {
107 [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
108 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
109 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
110 };
111 char buffer[256];
112
113 snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
114
115 vk_debug_report(&instance->debug_report_callbacks,
116 vk_flags[level],
117 VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
118 (uint64_t) (uintptr_t) debug_data->module,
119 0, 0, "anv", buffer);
120 }
121
122 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
123 * we can't do that yet because we don't have the ability to copy nir.
124 */
125 static nir_shader *
126 anv_shader_compile_to_nir(struct anv_device *device,
127 void *mem_ctx,
128 const struct anv_shader_module *module,
129 const char *entrypoint_name,
130 gl_shader_stage stage,
131 const VkSpecializationInfo *spec_info)
132 {
133 const struct anv_physical_device *pdevice = device->physical;
134 const struct brw_compiler *compiler = pdevice->compiler;
135 const nir_shader_compiler_options *nir_options =
136 compiler->glsl_compiler_options[stage].NirOptions;
137
138 uint32_t *spirv = (uint32_t *) module->data;
139 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
140 assert(module->size % 4 == 0);
141
142 uint32_t num_spec_entries = 0;
143 struct nir_spirv_specialization *spec_entries = NULL;
144 if (spec_info && spec_info->mapEntryCount > 0) {
145 num_spec_entries = spec_info->mapEntryCount;
146 spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
147 for (uint32_t i = 0; i < num_spec_entries; i++) {
148 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
149 const void *data = spec_info->pData + entry.offset;
150 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
151
152 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
153 switch (entry.size) {
154 case 8:
155 spec_entries[i].value.u64 = *(const uint64_t *)data;
156 break;
157 case 4:
158 spec_entries[i].value.u32 = *(const uint32_t *)data;
159 break;
160 case 2:
161 spec_entries[i].value.u16 = *(const uint16_t *)data;
162 break;
163 case 1:
164 spec_entries[i].value.u8 = *(const uint8_t *)data;
165 break;
166 default:
167 assert(!"Invalid spec constant size");
168 break;
169 }
170 }
171 }
172
173 struct anv_spirv_debug_data spirv_debug_data = {
174 .device = device,
175 .module = module,
176 };
177 struct spirv_to_nir_options spirv_options = {
178 .frag_coord_is_sysval = true,
179 .caps = {
180 .demote_to_helper_invocation = true,
181 .derivative_group = true,
182 .descriptor_array_dynamic_indexing = true,
183 .descriptor_array_non_uniform_indexing = true,
184 .descriptor_indexing = true,
185 .device_group = true,
186 .draw_parameters = true,
187 .float16 = pdevice->info.gen >= 8,
188 .float64 = pdevice->info.gen >= 8,
189 .fragment_shader_sample_interlock = pdevice->info.gen >= 9,
190 .fragment_shader_pixel_interlock = pdevice->info.gen >= 9,
191 .geometry_streams = true,
192 .image_write_without_format = true,
193 .int8 = pdevice->info.gen >= 8,
194 .int16 = pdevice->info.gen >= 8,
195 .int64 = pdevice->info.gen >= 8,
196 .int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
197 .integer_functions2 = pdevice->info.gen >= 8,
198 .min_lod = true,
199 .multiview = true,
200 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
201 .post_depth_coverage = pdevice->info.gen >= 9,
202 .runtime_descriptor_array = true,
203 .float_controls = pdevice->info.gen >= 8,
204 .shader_clock = true,
205 .shader_viewport_index_layer = true,
206 .stencil_export = pdevice->info.gen >= 9,
207 .storage_8bit = pdevice->info.gen >= 8,
208 .storage_16bit = pdevice->info.gen >= 8,
209 .subgroup_arithmetic = true,
210 .subgroup_basic = true,
211 .subgroup_ballot = true,
212 .subgroup_quad = true,
213 .subgroup_shuffle = true,
214 .subgroup_vote = true,
215 .tessellation = true,
216 .transform_feedback = pdevice->info.gen >= 8,
217 .variable_pointers = true,
218 .vk_memory_model = true,
219 .vk_memory_model_device_scope = true,
220 },
221 .ubo_addr_format = nir_address_format_32bit_index_offset,
222 .ssbo_addr_format =
223 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
224 .phys_ssbo_addr_format = nir_address_format_64bit_global,
225 .push_const_addr_format = nir_address_format_logical,
226
227 /* TODO: Consider changing this to an address format that has the NULL
228 * pointer equals to 0. That might be a better format to play nice
229 * with certain code / code generators.
230 */
231 .shared_addr_format = nir_address_format_32bit_offset,
232 .debug = {
233 .func = anv_spirv_nir_debug,
234 .private_data = &spirv_debug_data,
235 },
236 };
237
238
239 nir_shader *nir =
240 spirv_to_nir(spirv, module->size / 4,
241 spec_entries, num_spec_entries,
242 stage, entrypoint_name, &spirv_options, nir_options);
243 assert(nir->info.stage == stage);
244 nir_validate_shader(nir, "after spirv_to_nir");
245 ralloc_steal(mem_ctx, nir);
246
247 free(spec_entries);
248
249 if (unlikely(INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage))) {
250 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
251 gl_shader_stage_name(stage));
252 nir_print_shader(nir, stderr);
253 }
254
255 /* We have to lower away local constant initializers right before we
256 * inline functions. That way they get properly initialized at the top
257 * of the function and not at the top of its caller.
258 */
259 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
260 NIR_PASS_V(nir, nir_lower_returns);
261 NIR_PASS_V(nir, nir_inline_functions);
262 NIR_PASS_V(nir, nir_opt_deref);
263
264 /* Pick off the single entrypoint that we want */
265 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
266 if (!func->is_entrypoint)
267 exec_node_remove(&func->node);
268 }
269 assert(exec_list_length(&nir->functions) == 1);
270
271 /* Now that we've deleted all but the main function, we can go ahead and
272 * lower the rest of the constant initializers. We do this here so that
273 * nir_remove_dead_variables and split_per_member_structs below see the
274 * corresponding stores.
275 */
276 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
277
278 /* Split member structs. We do this before lower_io_to_temporaries so that
279 * it doesn't lower system values to temporaries by accident.
280 */
281 NIR_PASS_V(nir, nir_split_var_copies);
282 NIR_PASS_V(nir, nir_split_per_member_structs);
283
284 NIR_PASS_V(nir, nir_remove_dead_variables,
285 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
286
287 NIR_PASS_V(nir, nir_propagate_invariant);
288 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
289 nir_shader_get_entrypoint(nir), true, false);
290
291 NIR_PASS_V(nir, nir_lower_frexp);
292
293 /* Vulkan uses the separate-shader linking model */
294 nir->info.separate_shader = true;
295
296 brw_preprocess_nir(compiler, nir, NULL);
297
298 return nir;
299 }
300
301 void anv_DestroyPipeline(
302 VkDevice _device,
303 VkPipeline _pipeline,
304 const VkAllocationCallbacks* pAllocator)
305 {
306 ANV_FROM_HANDLE(anv_device, device, _device);
307 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
308
309 if (!pipeline)
310 return;
311
312 anv_reloc_list_finish(&pipeline->batch_relocs,
313 pAllocator ? pAllocator : &device->vk.alloc);
314
315 ralloc_free(pipeline->mem_ctx);
316
317 switch (pipeline->type) {
318 case ANV_PIPELINE_GRAPHICS: {
319 struct anv_graphics_pipeline *gfx_pipeline =
320 anv_pipeline_to_graphics(pipeline);
321
322 if (gfx_pipeline->blend_state.map)
323 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
324
325 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
326 if (gfx_pipeline->shaders[s])
327 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
328 }
329 break;
330 }
331
332 case ANV_PIPELINE_COMPUTE: {
333 struct anv_compute_pipeline *compute_pipeline =
334 anv_pipeline_to_compute(pipeline);
335
336 if (compute_pipeline->cs)
337 anv_shader_bin_unref(device, compute_pipeline->cs);
338
339 break;
340 }
341
342 default:
343 unreachable("invalid pipeline type");
344 }
345
346 vk_object_base_finish(&pipeline->base);
347 vk_free2(&device->vk.alloc, pAllocator, pipeline);
348 }
349
350 static const uint32_t vk_to_gen_primitive_type[] = {
351 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
352 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
353 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
354 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
355 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
356 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
357 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
358 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
359 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
360 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
361 };
362
363 static void
364 populate_sampler_prog_key(const struct gen_device_info *devinfo,
365 struct brw_sampler_prog_key_data *key)
366 {
367 /* Almost all multisampled textures are compressed. The only time when we
368 * don't compress a multisampled texture is for 16x MSAA with a surface
369 * width greater than 8k which is a bit of an edge case. Since the sampler
370 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
371 * to tell the compiler to always assume compression.
372 */
373 key->compressed_multisample_layout_mask = ~0;
374
375 /* SkyLake added support for 16x MSAA. With this came a new message for
376 * reading from a 16x MSAA surface with compression. The new message was
377 * needed because now the MCS data is 64 bits instead of 32 or lower as is
378 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
379 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
380 * so we can just use it unconditionally. This may not be quite as
381 * efficient but it saves us from recompiling.
382 */
383 if (devinfo->gen >= 9)
384 key->msaa_16 = ~0;
385
386 /* XXX: Handle texture swizzle on HSW- */
387 for (int i = 0; i < MAX_SAMPLERS; i++) {
388 /* Assume color sampler, no swizzling. (Works for BDW+) */
389 key->swizzles[i] = SWIZZLE_XYZW;
390 }
391 }
392
393 static void
394 populate_base_prog_key(const struct gen_device_info *devinfo,
395 VkPipelineShaderStageCreateFlags flags,
396 struct brw_base_prog_key *key)
397 {
398 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
399 key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
400 else
401 key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
402
403 populate_sampler_prog_key(devinfo, &key->tex);
404 }
405
406 static void
407 populate_vs_prog_key(const struct gen_device_info *devinfo,
408 VkPipelineShaderStageCreateFlags flags,
409 struct brw_vs_prog_key *key)
410 {
411 memset(key, 0, sizeof(*key));
412
413 populate_base_prog_key(devinfo, flags, &key->base);
414
415 /* XXX: Handle vertex input work-arounds */
416
417 /* XXX: Handle sampler_prog_key */
418 }
419
420 static void
421 populate_tcs_prog_key(const struct gen_device_info *devinfo,
422 VkPipelineShaderStageCreateFlags flags,
423 unsigned input_vertices,
424 struct brw_tcs_prog_key *key)
425 {
426 memset(key, 0, sizeof(*key));
427
428 populate_base_prog_key(devinfo, flags, &key->base);
429
430 key->input_vertices = input_vertices;
431 }
432
433 static void
434 populate_tes_prog_key(const struct gen_device_info *devinfo,
435 VkPipelineShaderStageCreateFlags flags,
436 struct brw_tes_prog_key *key)
437 {
438 memset(key, 0, sizeof(*key));
439
440 populate_base_prog_key(devinfo, flags, &key->base);
441 }
442
443 static void
444 populate_gs_prog_key(const struct gen_device_info *devinfo,
445 VkPipelineShaderStageCreateFlags flags,
446 struct brw_gs_prog_key *key)
447 {
448 memset(key, 0, sizeof(*key));
449
450 populate_base_prog_key(devinfo, flags, &key->base);
451 }
452
453 static void
454 populate_wm_prog_key(const struct gen_device_info *devinfo,
455 VkPipelineShaderStageCreateFlags flags,
456 const struct anv_subpass *subpass,
457 const VkPipelineMultisampleStateCreateInfo *ms_info,
458 struct brw_wm_prog_key *key)
459 {
460 memset(key, 0, sizeof(*key));
461
462 populate_base_prog_key(devinfo, flags, &key->base);
463
464 /* We set this to 0 here and set to the actual value before we call
465 * brw_compile_fs.
466 */
467 key->input_slots_valid = 0;
468
469 /* Vulkan doesn't specify a default */
470 key->high_quality_derivatives = false;
471
472 /* XXX Vulkan doesn't appear to specify */
473 key->clamp_fragment_color = false;
474
475 assert(subpass->color_count <= MAX_RTS);
476 for (uint32_t i = 0; i < subpass->color_count; i++) {
477 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
478 key->color_outputs_valid |= (1 << i);
479 }
480
481 key->nr_color_regions = subpass->color_count;
482
483 /* To reduce possible shader recompilations we would need to know if
484 * there is a SampleMask output variable to compute if we should emit
485 * code to workaround the issue that hardware disables alpha to coverage
486 * when there is SampleMask output.
487 */
488 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
489
490 /* Vulkan doesn't support fixed-function alpha test */
491 key->alpha_test_replicate_alpha = false;
492
493 if (ms_info) {
494 /* We should probably pull this out of the shader, but it's fairly
495 * harmless to compute it and then let dead-code take care of it.
496 */
497 if (ms_info->rasterizationSamples > 1) {
498 key->persample_interp = ms_info->sampleShadingEnable &&
499 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
500 key->multisample_fbo = true;
501 }
502
503 key->frag_coord_adds_sample_pos = key->persample_interp;
504 }
505 }
506
507 static void
508 populate_cs_prog_key(const struct gen_device_info *devinfo,
509 VkPipelineShaderStageCreateFlags flags,
510 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
511 struct brw_cs_prog_key *key)
512 {
513 memset(key, 0, sizeof(*key));
514
515 populate_base_prog_key(devinfo, flags, &key->base);
516
517 if (rss_info) {
518 assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
519
520 /* These enum values are expressly chosen to be equal to the subgroup
521 * size that they require.
522 */
523 assert(rss_info->requiredSubgroupSize == 8 ||
524 rss_info->requiredSubgroupSize == 16 ||
525 rss_info->requiredSubgroupSize == 32);
526 key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
527 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
528 /* If the client expressly requests full subgroups and they don't
529 * specify a subgroup size, we need to pick one. If they're requested
530 * varying subgroup sizes, we set it to UNIFORM and let the back-end
531 * compiler pick. Otherwise, we specify the API value of 32.
532 * Performance will likely be terrible in this case but there's nothing
533 * we can do about that. The client should have chosen a size.
534 */
535 if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
536 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
537 else
538 key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
539 }
540 }
541
542 struct anv_pipeline_stage {
543 gl_shader_stage stage;
544
545 const struct anv_shader_module *module;
546 const char *entrypoint;
547 const VkSpecializationInfo *spec_info;
548
549 unsigned char shader_sha1[20];
550
551 union brw_any_prog_key key;
552
553 struct {
554 gl_shader_stage stage;
555 unsigned char sha1[20];
556 } cache_key;
557
558 nir_shader *nir;
559
560 struct anv_pipeline_binding surface_to_descriptor[256];
561 struct anv_pipeline_binding sampler_to_descriptor[256];
562 struct anv_pipeline_bind_map bind_map;
563
564 union brw_any_prog_data prog_data;
565
566 uint32_t num_stats;
567 struct brw_compile_stats stats[3];
568 char *disasm[3];
569
570 VkPipelineCreationFeedbackEXT feedback;
571
572 const unsigned *code;
573 };
574
575 static void
576 anv_pipeline_hash_shader(const struct anv_shader_module *module,
577 const char *entrypoint,
578 gl_shader_stage stage,
579 const VkSpecializationInfo *spec_info,
580 unsigned char *sha1_out)
581 {
582 struct mesa_sha1 ctx;
583 _mesa_sha1_init(&ctx);
584
585 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
586 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
587 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
588 if (spec_info) {
589 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
590 spec_info->mapEntryCount *
591 sizeof(*spec_info->pMapEntries));
592 _mesa_sha1_update(&ctx, spec_info->pData,
593 spec_info->dataSize);
594 }
595
596 _mesa_sha1_final(&ctx, sha1_out);
597 }
598
599 static void
600 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
601 struct anv_pipeline_layout *layout,
602 struct anv_pipeline_stage *stages,
603 unsigned char *sha1_out)
604 {
605 struct mesa_sha1 ctx;
606 _mesa_sha1_init(&ctx);
607
608 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
609 sizeof(pipeline->subpass->view_mask));
610
611 if (layout)
612 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
613
614 const bool rba = pipeline->base.device->robust_buffer_access;
615 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
616
617 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
618 if (stages[s].entrypoint) {
619 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
620 sizeof(stages[s].shader_sha1));
621 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
622 }
623 }
624
625 _mesa_sha1_final(&ctx, sha1_out);
626 }
627
628 static void
629 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
630 struct anv_pipeline_layout *layout,
631 struct anv_pipeline_stage *stage,
632 unsigned char *sha1_out)
633 {
634 struct mesa_sha1 ctx;
635 _mesa_sha1_init(&ctx);
636
637 if (layout)
638 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
639
640 const bool rba = pipeline->base.device->robust_buffer_access;
641 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
642
643 _mesa_sha1_update(&ctx, stage->shader_sha1,
644 sizeof(stage->shader_sha1));
645 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
646
647 _mesa_sha1_final(&ctx, sha1_out);
648 }
649
650 static nir_shader *
651 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
652 struct anv_pipeline_cache *cache,
653 void *mem_ctx,
654 struct anv_pipeline_stage *stage)
655 {
656 const struct brw_compiler *compiler =
657 pipeline->device->physical->compiler;
658 const nir_shader_compiler_options *nir_options =
659 compiler->glsl_compiler_options[stage->stage].NirOptions;
660 nir_shader *nir;
661
662 nir = anv_device_search_for_nir(pipeline->device, cache,
663 nir_options,
664 stage->shader_sha1,
665 mem_ctx);
666 if (nir) {
667 assert(nir->info.stage == stage->stage);
668 return nir;
669 }
670
671 nir = anv_shader_compile_to_nir(pipeline->device,
672 mem_ctx,
673 stage->module,
674 stage->entrypoint,
675 stage->stage,
676 stage->spec_info);
677 if (nir) {
678 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
679 return nir;
680 }
681
682 return NULL;
683 }
684
685 static void
686 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
687 void *mem_ctx,
688 struct anv_pipeline_stage *stage,
689 struct anv_pipeline_layout *layout)
690 {
691 const struct anv_physical_device *pdevice = pipeline->device->physical;
692 const struct brw_compiler *compiler = pdevice->compiler;
693
694 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
695 nir_shader *nir = stage->nir;
696
697 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
698 NIR_PASS_V(nir, nir_lower_wpos_center,
699 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
700 NIR_PASS_V(nir, nir_lower_input_attachments, true);
701 }
702
703 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
704
705 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
706 NIR_PASS_V(nir, anv_nir_lower_multiview,
707 anv_pipeline_to_graphics(pipeline));
708 }
709
710 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
711
712 NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL);
713
714 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
715 nir_address_format_64bit_global);
716
717 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
718 anv_nir_apply_pipeline_layout(pdevice,
719 pipeline->device->robust_buffer_access,
720 layout, nir, &stage->bind_map);
721
722 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
723 nir_address_format_32bit_index_offset);
724 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
725 anv_nir_ssbo_addr_format(pdevice,
726 pipeline->device->robust_buffer_access));
727
728 NIR_PASS_V(nir, nir_opt_constant_folding);
729
730 /* We don't support non-uniform UBOs and non-uniform SSBO access is
731 * handled naturally by falling back to A64 messages.
732 */
733 NIR_PASS_V(nir, nir_lower_non_uniform_access,
734 nir_lower_non_uniform_texture_access |
735 nir_lower_non_uniform_image_access);
736
737 anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
738 nir, prog_data, &stage->bind_map, mem_ctx);
739
740 stage->nir = nir;
741 }
742
743 static void
744 anv_pipeline_link_vs(const struct brw_compiler *compiler,
745 struct anv_pipeline_stage *vs_stage,
746 struct anv_pipeline_stage *next_stage)
747 {
748 if (next_stage)
749 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
750 }
751
752 static void
753 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
754 void *mem_ctx,
755 struct anv_graphics_pipeline *pipeline,
756 struct anv_pipeline_stage *vs_stage)
757 {
758 /* When using Primitive Replication for multiview, each view gets its own
759 * position slot.
760 */
761 uint32_t pos_slots = pipeline->use_primitive_replication ?
762 anv_subpass_view_count(pipeline->subpass) : 1;
763
764 brw_compute_vue_map(compiler->devinfo,
765 &vs_stage->prog_data.vs.base.vue_map,
766 vs_stage->nir->info.outputs_written,
767 vs_stage->nir->info.separate_shader,
768 pos_slots);
769
770 vs_stage->num_stats = 1;
771 vs_stage->code = brw_compile_vs(compiler, pipeline->base.device, mem_ctx,
772 &vs_stage->key.vs,
773 &vs_stage->prog_data.vs,
774 vs_stage->nir, -1,
775 vs_stage->stats, NULL);
776 }
777
778 static void
779 merge_tess_info(struct shader_info *tes_info,
780 const struct shader_info *tcs_info)
781 {
782 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
783 *
784 * "PointMode. Controls generation of points rather than triangles
785 * or lines. This functionality defaults to disabled, and is
786 * enabled if either shader stage includes the execution mode.
787 *
788 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
789 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
790 * and OutputVertices, it says:
791 *
792 * "One mode must be set in at least one of the tessellation
793 * shader stages."
794 *
795 * So, the fields can be set in either the TCS or TES, but they must
796 * agree if set in both. Our backend looks at TES, so bitwise-or in
797 * the values from the TCS.
798 */
799 assert(tcs_info->tess.tcs_vertices_out == 0 ||
800 tes_info->tess.tcs_vertices_out == 0 ||
801 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
802 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
803
804 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
805 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
806 tcs_info->tess.spacing == tes_info->tess.spacing);
807 tes_info->tess.spacing |= tcs_info->tess.spacing;
808
809 assert(tcs_info->tess.primitive_mode == 0 ||
810 tes_info->tess.primitive_mode == 0 ||
811 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
812 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
813 tes_info->tess.ccw |= tcs_info->tess.ccw;
814 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
815 }
816
817 static void
818 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
819 struct anv_pipeline_stage *tcs_stage,
820 struct anv_pipeline_stage *tes_stage)
821 {
822 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
823
824 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
825
826 nir_lower_patch_vertices(tes_stage->nir,
827 tcs_stage->nir->info.tess.tcs_vertices_out,
828 NULL);
829
830 /* Copy TCS info into the TES info */
831 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
832
833 /* Whacking the key after cache lookup is a bit sketchy, but all of
834 * this comes from the SPIR-V, which is part of the hash used for the
835 * pipeline cache. So it should be safe.
836 */
837 tcs_stage->key.tcs.tes_primitive_mode =
838 tes_stage->nir->info.tess.primitive_mode;
839 tcs_stage->key.tcs.quads_workaround =
840 compiler->devinfo->gen < 9 &&
841 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
842 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
843 }
844
845 static void
846 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
847 void *mem_ctx,
848 struct anv_device *device,
849 struct anv_pipeline_stage *tcs_stage,
850 struct anv_pipeline_stage *prev_stage)
851 {
852 tcs_stage->key.tcs.outputs_written =
853 tcs_stage->nir->info.outputs_written;
854 tcs_stage->key.tcs.patch_outputs_written =
855 tcs_stage->nir->info.patch_outputs_written;
856
857 tcs_stage->num_stats = 1;
858 tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
859 &tcs_stage->key.tcs,
860 &tcs_stage->prog_data.tcs,
861 tcs_stage->nir, -1,
862 tcs_stage->stats, NULL);
863 }
864
865 static void
866 anv_pipeline_link_tes(const struct brw_compiler *compiler,
867 struct anv_pipeline_stage *tes_stage,
868 struct anv_pipeline_stage *next_stage)
869 {
870 if (next_stage)
871 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
872 }
873
874 static void
875 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
876 void *mem_ctx,
877 struct anv_device *device,
878 struct anv_pipeline_stage *tes_stage,
879 struct anv_pipeline_stage *tcs_stage)
880 {
881 tes_stage->key.tes.inputs_read =
882 tcs_stage->nir->info.outputs_written;
883 tes_stage->key.tes.patch_inputs_read =
884 tcs_stage->nir->info.patch_outputs_written;
885
886 tes_stage->num_stats = 1;
887 tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
888 &tes_stage->key.tes,
889 &tcs_stage->prog_data.tcs.base.vue_map,
890 &tes_stage->prog_data.tes,
891 tes_stage->nir, -1,
892 tes_stage->stats, NULL);
893 }
894
895 static void
896 anv_pipeline_link_gs(const struct brw_compiler *compiler,
897 struct anv_pipeline_stage *gs_stage,
898 struct anv_pipeline_stage *next_stage)
899 {
900 if (next_stage)
901 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
902 }
903
904 static void
905 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
906 void *mem_ctx,
907 struct anv_device *device,
908 struct anv_pipeline_stage *gs_stage,
909 struct anv_pipeline_stage *prev_stage)
910 {
911 brw_compute_vue_map(compiler->devinfo,
912 &gs_stage->prog_data.gs.base.vue_map,
913 gs_stage->nir->info.outputs_written,
914 gs_stage->nir->info.separate_shader, 1);
915
916 gs_stage->num_stats = 1;
917 gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
918 &gs_stage->key.gs,
919 &gs_stage->prog_data.gs,
920 gs_stage->nir, NULL, -1,
921 gs_stage->stats, NULL);
922 }
923
924 static void
925 anv_pipeline_link_fs(const struct brw_compiler *compiler,
926 struct anv_pipeline_stage *stage)
927 {
928 unsigned num_rt_bindings;
929 struct anv_pipeline_binding rt_bindings[MAX_RTS];
930 if (stage->key.wm.nr_color_regions > 0) {
931 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
932 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
933 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
934 rt_bindings[rt] = (struct anv_pipeline_binding) {
935 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
936 .index = rt,
937 };
938 } else {
939 /* Setup a null render target */
940 rt_bindings[rt] = (struct anv_pipeline_binding) {
941 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
942 .index = UINT32_MAX,
943 };
944 }
945 }
946 num_rt_bindings = stage->key.wm.nr_color_regions;
947 } else {
948 /* Setup a null render target */
949 rt_bindings[0] = (struct anv_pipeline_binding) {
950 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
951 .index = UINT32_MAX,
952 };
953 num_rt_bindings = 1;
954 }
955
956 assert(num_rt_bindings <= MAX_RTS);
957 assert(stage->bind_map.surface_count == 0);
958 typed_memcpy(stage->bind_map.surface_to_descriptor,
959 rt_bindings, num_rt_bindings);
960 stage->bind_map.surface_count += num_rt_bindings;
961
962 /* Now that we've set up the color attachments, we can go through and
963 * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
964 * hopes that dead code can clean them up in this and any earlier shader
965 * stages.
966 */
967 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
968 bool deleted_output = false;
969 nir_foreach_variable_safe(var, &stage->nir->outputs) {
970 /* TODO: We don't delete depth/stencil writes. We probably could if the
971 * subpass doesn't have a depth/stencil attachment.
972 */
973 if (var->data.location < FRAG_RESULT_DATA0)
974 continue;
975
976 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
977
978 /* If this is the RT at location 0 and we have alpha to coverage
979 * enabled we still need that write because it will affect the coverage
980 * mask even if it's never written to a color target.
981 */
982 if (rt == 0 && stage->key.wm.alpha_to_coverage)
983 continue;
984
985 const unsigned array_len =
986 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
987 assert(rt + array_len <= MAX_RTS);
988
989 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
990 BITFIELD_RANGE(rt, array_len))) {
991 deleted_output = true;
992 var->data.mode = nir_var_function_temp;
993 exec_node_remove(&var->node);
994 exec_list_push_tail(&impl->locals, &var->node);
995 }
996 }
997
998 if (deleted_output)
999 nir_fixup_deref_modes(stage->nir);
1000
1001 /* We stored the number of subpass color attachments in nr_color_regions
1002 * when calculating the key for caching. Now that we've computed the bind
1003 * map, we can reduce this to the actual max before we go into the back-end
1004 * compiler.
1005 */
1006 stage->key.wm.nr_color_regions =
1007 util_last_bit(stage->key.wm.color_outputs_valid);
1008 }
1009
1010 static void
1011 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1012 void *mem_ctx,
1013 struct anv_device *device,
1014 struct anv_pipeline_stage *fs_stage,
1015 struct anv_pipeline_stage *prev_stage)
1016 {
1017 /* TODO: we could set this to 0 based on the information in nir_shader, but
1018 * we need this before we call spirv_to_nir.
1019 */
1020 assert(prev_stage);
1021 fs_stage->key.wm.input_slots_valid =
1022 prev_stage->prog_data.vue.vue_map.slots_valid;
1023
1024 fs_stage->code = brw_compile_fs(compiler, device, mem_ctx,
1025 &fs_stage->key.wm,
1026 &fs_stage->prog_data.wm,
1027 fs_stage->nir, -1, -1, -1,
1028 true, false, NULL,
1029 fs_stage->stats, NULL);
1030
1031 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1032 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1033 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1034
1035 if (fs_stage->key.wm.color_outputs_valid == 0 &&
1036 !fs_stage->prog_data.wm.has_side_effects &&
1037 !fs_stage->prog_data.wm.uses_omask &&
1038 !fs_stage->key.wm.alpha_to_coverage &&
1039 !fs_stage->prog_data.wm.uses_kill &&
1040 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1041 !fs_stage->prog_data.wm.computed_stencil) {
1042 /* This fragment shader has no outputs and no side effects. Go ahead
1043 * and return the code pointer so we don't accidentally think the
1044 * compile failed but zero out prog_data which will set program_size to
1045 * zero and disable the stage.
1046 */
1047 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1048 }
1049 }
1050
1051 static void
1052 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1053 struct anv_pipeline_stage *stage,
1054 struct brw_compile_stats *stats,
1055 uint32_t code_offset)
1056 {
1057 char *nir = NULL;
1058 if (stage->nir &&
1059 (pipeline->flags &
1060 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1061 char *stream_data = NULL;
1062 size_t stream_size = 0;
1063 FILE *stream = open_memstream(&stream_data, &stream_size);
1064
1065 nir_print_shader(stage->nir, stream);
1066
1067 fclose(stream);
1068
1069 /* Copy it to a ralloc'd thing */
1070 nir = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1071 memcpy(nir, stream_data, stream_size);
1072 nir[stream_size] = 0;
1073
1074 free(stream_data);
1075 }
1076
1077 char *disasm = NULL;
1078 if (stage->code &&
1079 (pipeline->flags &
1080 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1081 char *stream_data = NULL;
1082 size_t stream_size = 0;
1083 FILE *stream = open_memstream(&stream_data, &stream_size);
1084
1085 uint32_t push_size = 0;
1086 for (unsigned i = 0; i < 4; i++)
1087 push_size += stage->bind_map.push_ranges[i].length;
1088 if (push_size > 0) {
1089 fprintf(stream, "Push constant ranges:\n");
1090 for (unsigned i = 0; i < 4; i++) {
1091 if (stage->bind_map.push_ranges[i].length == 0)
1092 continue;
1093
1094 fprintf(stream, " RANGE%d (%dB): ", i,
1095 stage->bind_map.push_ranges[i].length * 32);
1096
1097 switch (stage->bind_map.push_ranges[i].set) {
1098 case ANV_DESCRIPTOR_SET_NULL:
1099 fprintf(stream, "NULL");
1100 break;
1101
1102 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1103 fprintf(stream, "Vulkan push constants and API params");
1104 break;
1105
1106 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1107 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1108 stage->bind_map.push_ranges[i].index,
1109 stage->bind_map.push_ranges[i].start * 32);
1110 break;
1111
1112 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1113 unreachable("gl_NumWorkgroups is never pushed");
1114
1115 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1116 fprintf(stream, "Inline shader constant data (start=%dB)",
1117 stage->bind_map.push_ranges[i].start * 32);
1118 break;
1119
1120 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1121 unreachable("Color attachments can't be pushed");
1122
1123 default:
1124 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1125 stage->bind_map.push_ranges[i].set,
1126 stage->bind_map.push_ranges[i].index,
1127 stage->bind_map.push_ranges[i].start * 32);
1128 break;
1129 }
1130 fprintf(stream, "\n");
1131 }
1132 fprintf(stream, "\n");
1133 }
1134
1135 /* Creating this is far cheaper than it looks. It's perfectly fine to
1136 * do it for every binary.
1137 */
1138 struct gen_disasm *d = gen_disasm_create(&pipeline->device->info);
1139 gen_disasm_disassemble(d, stage->code, code_offset, stream);
1140 gen_disasm_destroy(d);
1141
1142 fclose(stream);
1143
1144 /* Copy it to a ralloc'd thing */
1145 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1146 memcpy(disasm, stream_data, stream_size);
1147 disasm[stream_size] = 0;
1148
1149 free(stream_data);
1150 }
1151
1152 const struct anv_pipeline_executable exe = {
1153 .stage = stage->stage,
1154 .stats = *stats,
1155 .nir = nir,
1156 .disasm = disasm,
1157 };
1158 util_dynarray_append(&pipeline->executables,
1159 struct anv_pipeline_executable, exe);
1160 }
1161
1162 static void
1163 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1164 struct anv_pipeline_stage *stage,
1165 struct anv_shader_bin *bin)
1166 {
1167 if (stage->stage == MESA_SHADER_FRAGMENT) {
1168 /* We pull the prog data and stats out of the anv_shader_bin because
1169 * the anv_pipeline_stage may not be fully populated if we successfully
1170 * looked up the shader in a cache.
1171 */
1172 const struct brw_wm_prog_data *wm_prog_data =
1173 (const struct brw_wm_prog_data *)bin->prog_data;
1174 struct brw_compile_stats *stats = bin->stats;
1175
1176 if (wm_prog_data->dispatch_8) {
1177 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1178 }
1179
1180 if (wm_prog_data->dispatch_16) {
1181 anv_pipeline_add_executable(pipeline, stage, stats++,
1182 wm_prog_data->prog_offset_16);
1183 }
1184
1185 if (wm_prog_data->dispatch_32) {
1186 anv_pipeline_add_executable(pipeline, stage, stats++,
1187 wm_prog_data->prog_offset_32);
1188 }
1189 } else {
1190 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1191 }
1192 }
1193
1194 static void
1195 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1196 {
1197 /* TODO: Cache this pipeline-wide information. */
1198
1199 /* Primitive replication depends on information from all the shaders.
1200 * Recover this bit from the fact that we have more than one position slot
1201 * in the vertex shader when using it.
1202 */
1203 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1204 int pos_slots = 0;
1205 const struct brw_vue_prog_data *vue_prog_data =
1206 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1207 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1208 for (int i = 0; i < vue_map->num_slots; i++) {
1209 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1210 pos_slots++;
1211 }
1212 pipeline->use_primitive_replication = pos_slots > 1;
1213 }
1214
1215 static VkResult
1216 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1217 struct anv_pipeline_cache *cache,
1218 const VkGraphicsPipelineCreateInfo *info)
1219 {
1220 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1221 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1222 };
1223 int64_t pipeline_start = os_time_get_nano();
1224
1225 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1226 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1227
1228 pipeline->active_stages = 0;
1229
1230 VkResult result;
1231 for (uint32_t i = 0; i < info->stageCount; i++) {
1232 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1233 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1234
1235 pipeline->active_stages |= sinfo->stage;
1236
1237 int64_t stage_start = os_time_get_nano();
1238
1239 stages[stage].stage = stage;
1240 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
1241 stages[stage].entrypoint = sinfo->pName;
1242 stages[stage].spec_info = sinfo->pSpecializationInfo;
1243 anv_pipeline_hash_shader(stages[stage].module,
1244 stages[stage].entrypoint,
1245 stage,
1246 stages[stage].spec_info,
1247 stages[stage].shader_sha1);
1248
1249 const struct gen_device_info *devinfo = &pipeline->base.device->info;
1250 switch (stage) {
1251 case MESA_SHADER_VERTEX:
1252 populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
1253 break;
1254 case MESA_SHADER_TESS_CTRL:
1255 populate_tcs_prog_key(devinfo, sinfo->flags,
1256 info->pTessellationState->patchControlPoints,
1257 &stages[stage].key.tcs);
1258 break;
1259 case MESA_SHADER_TESS_EVAL:
1260 populate_tes_prog_key(devinfo, sinfo->flags, &stages[stage].key.tes);
1261 break;
1262 case MESA_SHADER_GEOMETRY:
1263 populate_gs_prog_key(devinfo, sinfo->flags, &stages[stage].key.gs);
1264 break;
1265 case MESA_SHADER_FRAGMENT: {
1266 const bool raster_enabled =
1267 !info->pRasterizationState->rasterizerDiscardEnable;
1268 populate_wm_prog_key(devinfo, sinfo->flags,
1269 pipeline->subpass,
1270 raster_enabled ? info->pMultisampleState : NULL,
1271 &stages[stage].key.wm);
1272 break;
1273 }
1274 default:
1275 unreachable("Invalid graphics shader stage");
1276 }
1277
1278 stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1279 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1280 }
1281
1282 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1283 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1284
1285 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1286
1287 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1288
1289 unsigned char sha1[20];
1290 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1291
1292 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1293 if (!stages[s].entrypoint)
1294 continue;
1295
1296 stages[s].cache_key.stage = s;
1297 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1298 }
1299
1300 const bool skip_cache_lookup =
1301 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1302
1303 if (!skip_cache_lookup) {
1304 unsigned found = 0;
1305 unsigned cache_hits = 0;
1306 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1307 if (!stages[s].entrypoint)
1308 continue;
1309
1310 int64_t stage_start = os_time_get_nano();
1311
1312 bool cache_hit;
1313 struct anv_shader_bin *bin =
1314 anv_device_search_for_kernel(pipeline->base.device, cache,
1315 &stages[s].cache_key,
1316 sizeof(stages[s].cache_key), &cache_hit);
1317 if (bin) {
1318 found++;
1319 pipeline->shaders[s] = bin;
1320 }
1321
1322 if (cache_hit) {
1323 cache_hits++;
1324 stages[s].feedback.flags |=
1325 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1326 }
1327 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1328 }
1329
1330 if (found == __builtin_popcount(pipeline->active_stages)) {
1331 if (cache_hits == found) {
1332 pipeline_feedback.flags |=
1333 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1334 }
1335 /* We found all our shaders in the cache. We're done. */
1336 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1337 if (!stages[s].entrypoint)
1338 continue;
1339
1340 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1341 pipeline->shaders[s]);
1342 }
1343 anv_pipeline_init_from_cached_graphics(pipeline);
1344 goto done;
1345 } else if (found > 0) {
1346 /* We found some but not all of our shaders. This shouldn't happen
1347 * most of the time but it can if we have a partially populated
1348 * pipeline cache.
1349 */
1350 assert(found < __builtin_popcount(pipeline->active_stages));
1351
1352 vk_debug_report(&pipeline->base.device->physical->instance->debug_report_callbacks,
1353 VK_DEBUG_REPORT_WARNING_BIT_EXT |
1354 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1355 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1356 (uint64_t)(uintptr_t)cache,
1357 0, 0, "anv",
1358 "Found a partial pipeline in the cache. This is "
1359 "most likely caused by an incomplete pipeline cache "
1360 "import or export");
1361
1362 /* We're going to have to recompile anyway, so just throw away our
1363 * references to the shaders in the cache. We'll get them out of the
1364 * cache again as part of the compilation process.
1365 */
1366 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1367 stages[s].feedback.flags = 0;
1368 if (pipeline->shaders[s]) {
1369 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1370 pipeline->shaders[s] = NULL;
1371 }
1372 }
1373 }
1374 }
1375
1376 void *pipeline_ctx = ralloc_context(NULL);
1377
1378 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1379 if (!stages[s].entrypoint)
1380 continue;
1381
1382 int64_t stage_start = os_time_get_nano();
1383
1384 assert(stages[s].stage == s);
1385 assert(pipeline->shaders[s] == NULL);
1386
1387 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1388 .surface_to_descriptor = stages[s].surface_to_descriptor,
1389 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1390 };
1391
1392 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1393 pipeline_ctx,
1394 &stages[s]);
1395 if (stages[s].nir == NULL) {
1396 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1397 goto fail;
1398 }
1399
1400 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1401 }
1402
1403 /* Walk backwards to link */
1404 struct anv_pipeline_stage *next_stage = NULL;
1405 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1406 if (!stages[s].entrypoint)
1407 continue;
1408
1409 switch (s) {
1410 case MESA_SHADER_VERTEX:
1411 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1412 break;
1413 case MESA_SHADER_TESS_CTRL:
1414 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1415 break;
1416 case MESA_SHADER_TESS_EVAL:
1417 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1418 break;
1419 case MESA_SHADER_GEOMETRY:
1420 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1421 break;
1422 case MESA_SHADER_FRAGMENT:
1423 anv_pipeline_link_fs(compiler, &stages[s]);
1424 break;
1425 default:
1426 unreachable("Invalid graphics shader stage");
1427 }
1428
1429 next_stage = &stages[s];
1430 }
1431
1432 if (pipeline->base.device->info.gen >= 12 &&
1433 pipeline->subpass->view_mask != 0) {
1434 /* For some pipelines HW Primitive Replication can be used instead of
1435 * instancing to implement Multiview. This depend on how viewIndex is
1436 * used in all the active shaders, so this check can't be done per
1437 * individual shaders.
1438 */
1439 nir_shader *shaders[MESA_SHADER_STAGES] = {};
1440 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1441 shaders[s] = stages[s].nir;
1442
1443 pipeline->use_primitive_replication =
1444 anv_check_for_primitive_replication(shaders, pipeline);
1445 } else {
1446 pipeline->use_primitive_replication = false;
1447 }
1448
1449 struct anv_pipeline_stage *prev_stage = NULL;
1450 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1451 if (!stages[s].entrypoint)
1452 continue;
1453
1454 int64_t stage_start = os_time_get_nano();
1455
1456 void *stage_ctx = ralloc_context(NULL);
1457
1458 nir_xfb_info *xfb_info = NULL;
1459 if (s == MESA_SHADER_VERTEX ||
1460 s == MESA_SHADER_TESS_EVAL ||
1461 s == MESA_SHADER_GEOMETRY)
1462 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1463
1464 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1465
1466 switch (s) {
1467 case MESA_SHADER_VERTEX:
1468 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1469 &stages[s]);
1470 break;
1471 case MESA_SHADER_TESS_CTRL:
1472 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1473 &stages[s], prev_stage);
1474 break;
1475 case MESA_SHADER_TESS_EVAL:
1476 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1477 &stages[s], prev_stage);
1478 break;
1479 case MESA_SHADER_GEOMETRY:
1480 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1481 &stages[s], prev_stage);
1482 break;
1483 case MESA_SHADER_FRAGMENT:
1484 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1485 &stages[s], prev_stage);
1486 break;
1487 default:
1488 unreachable("Invalid graphics shader stage");
1489 }
1490 if (stages[s].code == NULL) {
1491 ralloc_free(stage_ctx);
1492 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1493 goto fail;
1494 }
1495
1496 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1497 &stages[s].bind_map);
1498
1499 struct anv_shader_bin *bin =
1500 anv_device_upload_kernel(pipeline->base.device, cache, s,
1501 &stages[s].cache_key,
1502 sizeof(stages[s].cache_key),
1503 stages[s].code,
1504 stages[s].prog_data.base.program_size,
1505 stages[s].nir->constant_data,
1506 stages[s].nir->constant_data_size,
1507 &stages[s].prog_data.base,
1508 brw_prog_data_size(s),
1509 stages[s].stats, stages[s].num_stats,
1510 xfb_info, &stages[s].bind_map);
1511 if (!bin) {
1512 ralloc_free(stage_ctx);
1513 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1514 goto fail;
1515 }
1516
1517 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1518
1519 pipeline->shaders[s] = bin;
1520 ralloc_free(stage_ctx);
1521
1522 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1523
1524 prev_stage = &stages[s];
1525 }
1526
1527 ralloc_free(pipeline_ctx);
1528
1529 done:
1530
1531 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1532 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1533 /* This can happen if we decided to implicitly disable the fragment
1534 * shader. See anv_pipeline_compile_fs().
1535 */
1536 anv_shader_bin_unref(pipeline->base.device,
1537 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1538 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1539 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1540 }
1541
1542 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1543
1544 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1545 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1546 if (create_feedback) {
1547 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1548
1549 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1550 for (uint32_t i = 0; i < info->stageCount; i++) {
1551 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1552 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1553 }
1554 }
1555
1556 return VK_SUCCESS;
1557
1558 fail:
1559 ralloc_free(pipeline_ctx);
1560
1561 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1562 if (pipeline->shaders[s])
1563 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1564 }
1565
1566 return result;
1567 }
1568
1569 static void
1570 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
1571 {
1572 assert(glsl_type_is_vector_or_scalar(type));
1573
1574 uint32_t comp_size = glsl_type_is_boolean(type)
1575 ? 4 : glsl_get_bit_size(type) / 8;
1576 unsigned length = glsl_get_vector_elements(type);
1577 *size = comp_size * length,
1578 *align = comp_size * (length == 3 ? 4 : length);
1579 }
1580
1581 VkResult
1582 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1583 struct anv_pipeline_cache *cache,
1584 const VkComputePipelineCreateInfo *info,
1585 const struct anv_shader_module *module,
1586 const char *entrypoint,
1587 const VkSpecializationInfo *spec_info)
1588 {
1589 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1590 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1591 };
1592 int64_t pipeline_start = os_time_get_nano();
1593
1594 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1595
1596 struct anv_pipeline_stage stage = {
1597 .stage = MESA_SHADER_COMPUTE,
1598 .module = module,
1599 .entrypoint = entrypoint,
1600 .spec_info = spec_info,
1601 .cache_key = {
1602 .stage = MESA_SHADER_COMPUTE,
1603 },
1604 .feedback = {
1605 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1606 },
1607 };
1608 anv_pipeline_hash_shader(stage.module,
1609 stage.entrypoint,
1610 MESA_SHADER_COMPUTE,
1611 stage.spec_info,
1612 stage.shader_sha1);
1613
1614 struct anv_shader_bin *bin = NULL;
1615
1616 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1617 vk_find_struct_const(info->stage.pNext,
1618 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1619
1620 populate_cs_prog_key(&pipeline->base.device->info, info->stage.flags,
1621 rss_info, &stage.key.cs);
1622
1623 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1624
1625 const bool skip_cache_lookup =
1626 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1627
1628 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1629
1630 bool cache_hit = false;
1631 if (!skip_cache_lookup) {
1632 bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1633 &stage.cache_key,
1634 sizeof(stage.cache_key),
1635 &cache_hit);
1636 }
1637
1638 void *mem_ctx = ralloc_context(NULL);
1639 if (bin == NULL) {
1640 int64_t stage_start = os_time_get_nano();
1641
1642 stage.bind_map = (struct anv_pipeline_bind_map) {
1643 .surface_to_descriptor = stage.surface_to_descriptor,
1644 .sampler_to_descriptor = stage.sampler_to_descriptor
1645 };
1646
1647 /* Set up a binding for the gl_NumWorkGroups */
1648 stage.bind_map.surface_count = 1;
1649 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1650 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1651 };
1652
1653 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1654 if (stage.nir == NULL) {
1655 ralloc_free(mem_ctx);
1656 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1657 }
1658
1659 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1660
1661 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1662
1663 NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
1664 nir_var_mem_shared, shared_type_info);
1665 NIR_PASS_V(stage.nir, nir_lower_explicit_io,
1666 nir_var_mem_shared, nir_address_format_32bit_offset);
1667 NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1668
1669 stage.num_stats = 1;
1670 stage.code = brw_compile_cs(compiler, pipeline->base.device, mem_ctx,
1671 &stage.key.cs, &stage.prog_data.cs,
1672 stage.nir, -1, stage.stats, NULL);
1673 if (stage.code == NULL) {
1674 ralloc_free(mem_ctx);
1675 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1676 }
1677
1678 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1679
1680 if (!stage.prog_data.cs.uses_num_work_groups) {
1681 assert(stage.bind_map.surface_to_descriptor[0].set ==
1682 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1683 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1684 }
1685
1686 const unsigned code_size = stage.prog_data.base.program_size;
1687 bin = anv_device_upload_kernel(pipeline->base.device, cache,
1688 MESA_SHADER_COMPUTE,
1689 &stage.cache_key, sizeof(stage.cache_key),
1690 stage.code, code_size,
1691 stage.nir->constant_data,
1692 stage.nir->constant_data_size,
1693 &stage.prog_data.base,
1694 sizeof(stage.prog_data.cs),
1695 stage.stats, stage.num_stats,
1696 NULL, &stage.bind_map);
1697 if (!bin) {
1698 ralloc_free(mem_ctx);
1699 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1700 }
1701
1702 stage.feedback.duration = os_time_get_nano() - stage_start;
1703 }
1704
1705 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1706
1707 ralloc_free(mem_ctx);
1708
1709 if (cache_hit) {
1710 stage.feedback.flags |=
1711 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1712 pipeline_feedback.flags |=
1713 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1714 }
1715 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1716
1717 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1718 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1719 if (create_feedback) {
1720 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1721
1722 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1723 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1724 }
1725
1726 pipeline->cs = bin;
1727
1728 return VK_SUCCESS;
1729 }
1730
1731 uint32_t
1732 anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
1733 {
1734 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1735 return cs_prog_data->local_size[0] *
1736 cs_prog_data->local_size[1] *
1737 cs_prog_data->local_size[2];
1738 }
1739
1740 uint32_t
1741 anv_cs_threads(const struct anv_compute_pipeline *pipeline)
1742 {
1743 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1744 return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
1745 cs_prog_data->simd_size);
1746 }
1747
1748 /**
1749 * Copy pipeline state not marked as dynamic.
1750 * Dynamic state is pipeline state which hasn't been provided at pipeline
1751 * creation time, but is dynamically provided afterwards using various
1752 * vkCmdSet* functions.
1753 *
1754 * The set of state considered "non_dynamic" is determined by the pieces of
1755 * state that have their corresponding VkDynamicState enums omitted from
1756 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1757 *
1758 * @param[out] pipeline Destination non_dynamic state.
1759 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1760 */
1761 static void
1762 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
1763 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1764 {
1765 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1766 struct anv_subpass *subpass = pipeline->subpass;
1767
1768 pipeline->dynamic_state = default_dynamic_state;
1769
1770 if (pCreateInfo->pDynamicState) {
1771 /* Remove all of the states that are marked as dynamic */
1772 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1773 for (uint32_t s = 0; s < count; s++) {
1774 states &= ~anv_cmd_dirty_bit_for_vk_dynamic_state(
1775 pCreateInfo->pDynamicState->pDynamicStates[s]);
1776 }
1777 }
1778
1779 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1780
1781 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1782 *
1783 * pViewportState is [...] NULL if the pipeline
1784 * has rasterization disabled.
1785 */
1786 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1787 assert(pCreateInfo->pViewportState);
1788
1789 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1790 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
1791 typed_memcpy(dynamic->viewport.viewports,
1792 pCreateInfo->pViewportState->pViewports,
1793 pCreateInfo->pViewportState->viewportCount);
1794 }
1795
1796 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1797 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
1798 typed_memcpy(dynamic->scissor.scissors,
1799 pCreateInfo->pViewportState->pScissors,
1800 pCreateInfo->pViewportState->scissorCount);
1801 }
1802 }
1803
1804 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
1805 assert(pCreateInfo->pRasterizationState);
1806 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1807 }
1808
1809 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
1810 assert(pCreateInfo->pRasterizationState);
1811 dynamic->depth_bias.bias =
1812 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1813 dynamic->depth_bias.clamp =
1814 pCreateInfo->pRasterizationState->depthBiasClamp;
1815 dynamic->depth_bias.slope =
1816 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1817 }
1818
1819 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1820 *
1821 * pColorBlendState is [...] NULL if the pipeline has rasterization
1822 * disabled or if the subpass of the render pass the pipeline is
1823 * created against does not use any color attachments.
1824 */
1825 bool uses_color_att = false;
1826 for (unsigned i = 0; i < subpass->color_count; ++i) {
1827 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1828 uses_color_att = true;
1829 break;
1830 }
1831 }
1832
1833 if (uses_color_att &&
1834 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1835 assert(pCreateInfo->pColorBlendState);
1836
1837 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
1838 typed_memcpy(dynamic->blend_constants,
1839 pCreateInfo->pColorBlendState->blendConstants, 4);
1840 }
1841
1842 /* If there is no depthstencil attachment, then don't read
1843 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1844 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1845 * no need to override the depthstencil defaults in
1846 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1847 *
1848 * Section 9.2 of the Vulkan 1.0.15 spec says:
1849 *
1850 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1851 * disabled or if the subpass of the render pass the pipeline is created
1852 * against does not use a depth/stencil attachment.
1853 */
1854 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1855 subpass->depth_stencil_attachment) {
1856 assert(pCreateInfo->pDepthStencilState);
1857
1858 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
1859 dynamic->depth_bounds.min =
1860 pCreateInfo->pDepthStencilState->minDepthBounds;
1861 dynamic->depth_bounds.max =
1862 pCreateInfo->pDepthStencilState->maxDepthBounds;
1863 }
1864
1865 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
1866 dynamic->stencil_compare_mask.front =
1867 pCreateInfo->pDepthStencilState->front.compareMask;
1868 dynamic->stencil_compare_mask.back =
1869 pCreateInfo->pDepthStencilState->back.compareMask;
1870 }
1871
1872 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
1873 dynamic->stencil_write_mask.front =
1874 pCreateInfo->pDepthStencilState->front.writeMask;
1875 dynamic->stencil_write_mask.back =
1876 pCreateInfo->pDepthStencilState->back.writeMask;
1877 }
1878
1879 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
1880 dynamic->stencil_reference.front =
1881 pCreateInfo->pDepthStencilState->front.reference;
1882 dynamic->stencil_reference.back =
1883 pCreateInfo->pDepthStencilState->back.reference;
1884 }
1885 }
1886
1887 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
1888 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
1889 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
1890 if (line_state) {
1891 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
1892 dynamic->line_stipple.factor = line_state->lineStippleFactor;
1893 dynamic->line_stipple.pattern = line_state->lineStipplePattern;
1894 }
1895 }
1896
1897 pipeline->dynamic_state_mask = states;
1898 }
1899
1900 static void
1901 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1902 {
1903 #ifdef DEBUG
1904 struct anv_render_pass *renderpass = NULL;
1905 struct anv_subpass *subpass = NULL;
1906
1907 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1908 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1909 */
1910 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1911
1912 renderpass = anv_render_pass_from_handle(info->renderPass);
1913 assert(renderpass);
1914
1915 assert(info->subpass < renderpass->subpass_count);
1916 subpass = &renderpass->subpasses[info->subpass];
1917
1918 assert(info->stageCount >= 1);
1919 assert(info->pVertexInputState);
1920 assert(info->pInputAssemblyState);
1921 assert(info->pRasterizationState);
1922 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1923 assert(info->pViewportState);
1924 assert(info->pMultisampleState);
1925
1926 if (subpass && subpass->depth_stencil_attachment)
1927 assert(info->pDepthStencilState);
1928
1929 if (subpass && subpass->color_count > 0) {
1930 bool all_color_unused = true;
1931 for (int i = 0; i < subpass->color_count; i++) {
1932 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1933 all_color_unused = false;
1934 }
1935 /* pColorBlendState is ignored if the pipeline has rasterization
1936 * disabled or if the subpass of the render pass the pipeline is
1937 * created against does not use any color attachments.
1938 */
1939 assert(info->pColorBlendState || all_color_unused);
1940 }
1941 }
1942
1943 for (uint32_t i = 0; i < info->stageCount; ++i) {
1944 switch (info->pStages[i].stage) {
1945 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1946 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1947 assert(info->pTessellationState);
1948 break;
1949 default:
1950 break;
1951 }
1952 }
1953 #endif
1954 }
1955
1956 /**
1957 * Calculate the desired L3 partitioning based on the current state of the
1958 * pipeline. For now this simply returns the conservative defaults calculated
1959 * by get_default_l3_weights(), but we could probably do better by gathering
1960 * more statistics from the pipeline state (e.g. guess of expected URB usage
1961 * and bound surfaces), or by using feed-back from performance counters.
1962 */
1963 void
1964 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1965 {
1966 const struct gen_device_info *devinfo = &pipeline->device->info;
1967
1968 const struct gen_l3_weights w =
1969 gen_get_default_l3_weights(devinfo, true, needs_slm);
1970
1971 pipeline->l3_config = gen_get_l3_config(devinfo, w);
1972 }
1973
1974 VkResult
1975 anv_pipeline_init(struct anv_graphics_pipeline *pipeline,
1976 struct anv_device *device,
1977 struct anv_pipeline_cache *cache,
1978 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1979 const VkAllocationCallbacks *alloc)
1980 {
1981 VkResult result;
1982
1983 anv_pipeline_validate_create_info(pCreateInfo);
1984
1985 if (alloc == NULL)
1986 alloc = &device->vk.alloc;
1987
1988 vk_object_base_init(&device->vk, &pipeline->base.base,
1989 VK_OBJECT_TYPE_PIPELINE);
1990 pipeline->base.device = device;
1991 pipeline->base.type = ANV_PIPELINE_GRAPHICS;
1992
1993 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1994 assert(pCreateInfo->subpass < render_pass->subpass_count);
1995 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1996
1997 result = anv_reloc_list_init(&pipeline->base.batch_relocs, alloc);
1998 if (result != VK_SUCCESS)
1999 return result;
2000
2001 pipeline->base.batch.alloc = alloc;
2002 pipeline->base.batch.next = pipeline->base.batch.start = pipeline->batch_data;
2003 pipeline->base.batch.end = pipeline->base.batch.start + sizeof(pipeline->batch_data);
2004 pipeline->base.batch.relocs = &pipeline->base.batch_relocs;
2005 pipeline->base.batch.status = VK_SUCCESS;
2006
2007 pipeline->base.mem_ctx = ralloc_context(NULL);
2008 pipeline->base.flags = pCreateInfo->flags;
2009
2010 assert(pCreateInfo->pRasterizationState);
2011
2012 copy_non_dynamic_state(pipeline, pCreateInfo);
2013 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2014
2015 /* Previously we enabled depth clipping when !depthClampEnable.
2016 * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2017 * clipping info is available, use its enable value to determine clipping,
2018 * otherwise fallback to the previous !depthClampEnable logic.
2019 */
2020 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2021 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2022 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2023 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2024
2025 pipeline->sample_shading_enable =
2026 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2027 pCreateInfo->pMultisampleState &&
2028 pCreateInfo->pMultisampleState->sampleShadingEnable;
2029
2030 /* When we free the pipeline, we detect stages based on the NULL status
2031 * of various prog_data pointers. Make them NULL by default.
2032 */
2033 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
2034
2035 util_dynarray_init(&pipeline->base.executables, pipeline->base.mem_ctx);
2036
2037 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2038 if (result != VK_SUCCESS) {
2039 ralloc_free(pipeline->base.mem_ctx);
2040 anv_reloc_list_finish(&pipeline->base.batch_relocs, alloc);
2041 return result;
2042 }
2043
2044 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
2045
2046 anv_pipeline_setup_l3_config(&pipeline->base, false);
2047
2048 const VkPipelineVertexInputStateCreateInfo *vi_info =
2049 pCreateInfo->pVertexInputState;
2050
2051 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2052
2053 pipeline->vb_used = 0;
2054 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2055 const VkVertexInputAttributeDescription *desc =
2056 &vi_info->pVertexAttributeDescriptions[i];
2057
2058 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2059 pipeline->vb_used |= 1 << desc->binding;
2060 }
2061
2062 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2063 const VkVertexInputBindingDescription *desc =
2064 &vi_info->pVertexBindingDescriptions[i];
2065
2066 pipeline->vb[desc->binding].stride = desc->stride;
2067
2068 /* Step rate is programmed per vertex element (attribute), not
2069 * binding. Set up a map of which bindings step per instance, for
2070 * reference by vertex element setup. */
2071 switch (desc->inputRate) {
2072 default:
2073 case VK_VERTEX_INPUT_RATE_VERTEX:
2074 pipeline->vb[desc->binding].instanced = false;
2075 break;
2076 case VK_VERTEX_INPUT_RATE_INSTANCE:
2077 pipeline->vb[desc->binding].instanced = true;
2078 break;
2079 }
2080
2081 pipeline->vb[desc->binding].instance_divisor = 1;
2082 }
2083
2084 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2085 vk_find_struct_const(vi_info->pNext,
2086 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2087 if (vi_div_state) {
2088 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2089 const VkVertexInputBindingDivisorDescriptionEXT *desc =
2090 &vi_div_state->pVertexBindingDivisors[i];
2091
2092 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2093 }
2094 }
2095
2096 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2097 * different views. If the client asks for instancing, we need to multiply
2098 * the instance divisor by the number of views ensure that we repeat the
2099 * client's per-instance data once for each view.
2100 */
2101 if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2102 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2103 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2104 if (pipeline->vb[vb].instanced)
2105 pipeline->vb[vb].instance_divisor *= view_count;
2106 }
2107 }
2108
2109 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2110 pCreateInfo->pInputAssemblyState;
2111 const VkPipelineTessellationStateCreateInfo *tess_info =
2112 pCreateInfo->pTessellationState;
2113 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
2114
2115 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2116 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2117 else
2118 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
2119
2120 return VK_SUCCESS;
2121 }
2122
2123 #define WRITE_STR(field, ...) ({ \
2124 memset(field, 0, sizeof(field)); \
2125 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2126 assert(i > 0 && i < sizeof(field)); \
2127 })
2128
2129 VkResult anv_GetPipelineExecutablePropertiesKHR(
2130 VkDevice device,
2131 const VkPipelineInfoKHR* pPipelineInfo,
2132 uint32_t* pExecutableCount,
2133 VkPipelineExecutablePropertiesKHR* pProperties)
2134 {
2135 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2136 VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
2137
2138 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
2139 vk_outarray_append(&out, props) {
2140 gl_shader_stage stage = exe->stage;
2141 props->stages = mesa_to_vk_shader_stage(stage);
2142
2143 unsigned simd_width = exe->stats.dispatch_width;
2144 if (stage == MESA_SHADER_FRAGMENT) {
2145 WRITE_STR(props->name, "%s%d %s",
2146 simd_width ? "SIMD" : "vec",
2147 simd_width ? simd_width : 4,
2148 _mesa_shader_stage_to_string(stage));
2149 } else {
2150 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
2151 }
2152 WRITE_STR(props->description, "%s%d %s shader",
2153 simd_width ? "SIMD" : "vec",
2154 simd_width ? simd_width : 4,
2155 _mesa_shader_stage_to_string(stage));
2156
2157 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
2158 * wants a subgroup size of 1.
2159 */
2160 props->subgroupSize = MAX2(simd_width, 1);
2161 }
2162 }
2163
2164 return vk_outarray_status(&out);
2165 }
2166
2167 static const struct anv_pipeline_executable *
2168 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
2169 {
2170 assert(index < util_dynarray_num_elements(&pipeline->executables,
2171 struct anv_pipeline_executable));
2172 return util_dynarray_element(
2173 &pipeline->executables, struct anv_pipeline_executable, index);
2174 }
2175
2176 VkResult anv_GetPipelineExecutableStatisticsKHR(
2177 VkDevice device,
2178 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2179 uint32_t* pStatisticCount,
2180 VkPipelineExecutableStatisticKHR* pStatistics)
2181 {
2182 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2183 VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
2184
2185 const struct anv_pipeline_executable *exe =
2186 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2187
2188 const struct brw_stage_prog_data *prog_data;
2189 switch (pipeline->type) {
2190 case ANV_PIPELINE_GRAPHICS: {
2191 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
2192 break;
2193 }
2194 case ANV_PIPELINE_COMPUTE: {
2195 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
2196 break;
2197 }
2198 default:
2199 unreachable("invalid pipeline type");
2200 }
2201
2202 vk_outarray_append(&out, stat) {
2203 WRITE_STR(stat->name, "Instruction Count");
2204 WRITE_STR(stat->description,
2205 "Number of GEN instructions in the final generated "
2206 "shader executable.");
2207 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2208 stat->value.u64 = exe->stats.instructions;
2209 }
2210
2211 vk_outarray_append(&out, stat) {
2212 WRITE_STR(stat->name, "SEND Count");
2213 WRITE_STR(stat->description,
2214 "Number of instructions in the final generated shader "
2215 "executable which access external units such as the "
2216 "constant cache or the sampler.");
2217 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2218 stat->value.u64 = exe->stats.sends;
2219 }
2220
2221 vk_outarray_append(&out, stat) {
2222 WRITE_STR(stat->name, "Loop Count");
2223 WRITE_STR(stat->description,
2224 "Number of loops (not unrolled) in the final generated "
2225 "shader executable.");
2226 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2227 stat->value.u64 = exe->stats.loops;
2228 }
2229
2230 vk_outarray_append(&out, stat) {
2231 WRITE_STR(stat->name, "Cycle Count");
2232 WRITE_STR(stat->description,
2233 "Estimate of the number of EU cycles required to execute "
2234 "the final generated executable. This is an estimate only "
2235 "and may vary greatly from actual run-time performance.");
2236 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2237 stat->value.u64 = exe->stats.cycles;
2238 }
2239
2240 vk_outarray_append(&out, stat) {
2241 WRITE_STR(stat->name, "Spill Count");
2242 WRITE_STR(stat->description,
2243 "Number of scratch spill operations. This gives a rough "
2244 "estimate of the cost incurred due to spilling temporary "
2245 "values to memory. If this is non-zero, you may want to "
2246 "adjust your shader to reduce register pressure.");
2247 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2248 stat->value.u64 = exe->stats.spills;
2249 }
2250
2251 vk_outarray_append(&out, stat) {
2252 WRITE_STR(stat->name, "Fill Count");
2253 WRITE_STR(stat->description,
2254 "Number of scratch fill operations. This gives a rough "
2255 "estimate of the cost incurred due to spilling temporary "
2256 "values to memory. If this is non-zero, you may want to "
2257 "adjust your shader to reduce register pressure.");
2258 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2259 stat->value.u64 = exe->stats.fills;
2260 }
2261
2262 vk_outarray_append(&out, stat) {
2263 WRITE_STR(stat->name, "Scratch Memory Size");
2264 WRITE_STR(stat->description,
2265 "Number of bytes of scratch memory required by the "
2266 "generated shader executable. If this is non-zero, you "
2267 "may want to adjust your shader to reduce register "
2268 "pressure.");
2269 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2270 stat->value.u64 = prog_data->total_scratch;
2271 }
2272
2273 if (exe->stage == MESA_SHADER_COMPUTE) {
2274 vk_outarray_append(&out, stat) {
2275 WRITE_STR(stat->name, "Workgroup Memory Size");
2276 WRITE_STR(stat->description,
2277 "Number of bytes of workgroup shared memory used by this "
2278 "compute shader including any padding.");
2279 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2280 stat->value.u64 = brw_cs_prog_data_const(prog_data)->slm_size;
2281 }
2282 }
2283
2284 return vk_outarray_status(&out);
2285 }
2286
2287 static bool
2288 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2289 const char *data)
2290 {
2291 ir->isText = VK_TRUE;
2292
2293 size_t data_len = strlen(data) + 1;
2294
2295 if (ir->pData == NULL) {
2296 ir->dataSize = data_len;
2297 return true;
2298 }
2299
2300 strncpy(ir->pData, data, ir->dataSize);
2301 if (ir->dataSize < data_len)
2302 return false;
2303
2304 ir->dataSize = data_len;
2305 return true;
2306 }
2307
2308 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2309 VkDevice device,
2310 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2311 uint32_t* pInternalRepresentationCount,
2312 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2313 {
2314 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2315 VK_OUTARRAY_MAKE(out, pInternalRepresentations,
2316 pInternalRepresentationCount);
2317 bool incomplete_text = false;
2318
2319 const struct anv_pipeline_executable *exe =
2320 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2321
2322 if (exe->nir) {
2323 vk_outarray_append(&out, ir) {
2324 WRITE_STR(ir->name, "Final NIR");
2325 WRITE_STR(ir->description,
2326 "Final NIR before going into the back-end compiler");
2327
2328 if (!write_ir_text(ir, exe->nir))
2329 incomplete_text = true;
2330 }
2331 }
2332
2333 if (exe->disasm) {
2334 vk_outarray_append(&out, ir) {
2335 WRITE_STR(ir->name, "GEN Assembly");
2336 WRITE_STR(ir->description,
2337 "Final GEN assembly for the generated shader binary");
2338
2339 if (!write_ir_text(ir, exe->disasm))
2340 incomplete_text = true;
2341 }
2342 }
2343
2344 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2345 }